diff --git a/simX/Makefile b/simX/Makefile index 1f665a2c..fb563f13 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -1,7 +1,7 @@ ################################################################################ # HARPtools by Chad D. Kersey, Summer 2011 # ################################################################################ -CXXFLAGS ?= -std=c++11 -fPIC -O3 # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS +CXXFLAGS ?= -std=c++11 -fPIC -O3 -g # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp diff --git a/simX/core.cpp b/simX/core.cpp index 24e07ec0..b8899161 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -46,7 +46,7 @@ trace_inst.vd = -1; \ trace_inst.is_lw = false; \ trace_inst.is_sw = false; \ - trace_inst.mem_addresses = new unsigned[a.getNThds()]; \ + trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \ for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \ trace_inst.mem_stall_cycles = 0; \ trace_inst.fetch_stall_cycles = 0; \ @@ -163,6 +163,8 @@ void Core::step() { cout << "\n\n\n------------------------------------------------------\n"; + D(3, "Started core::step" << flush); + steps++; cout << "CYCLE: " << steps << '\n'; @@ -179,20 +181,30 @@ void Core::step() // cout << regii << ": " << renameTable[0][regii] << '\n'; // } - cout << '\n'; + cout << '\n' << flush; + cout << "About to call writeback" << endl; this->writeback(); + cout << "About to call load_store" << endl; this->load_store(); + cout << "About to call execute_unit" << endl; this->execute_unit(); + cout << "About to call scheduler" << endl; this->scheduler(); + cout << "About to call decode" << endl; this->decode(); + D(3, "About to call fetch" << flush); this->fetch(); + D(3, "Finished fetch" << flush); if (release_warp) { release_warp = false; stallWarp[release_warp_num] = false; } + + D(3, "released warp" << flush); + D(3, "Finished core::step" << flush); } void Core::getCacheDelays(trace_inst_t * trace_inst) @@ -396,15 +408,19 @@ void Core::fetch() { D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']'); w[schedule_w].step(&inst_in_fetch); - D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w); + D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w << flush); this->getCacheDelays(&inst_in_fetch); + D(3, "Got cache delays" << flush); if (inst_in_fetch.stall_warp) { stallWarp[inst_in_fetch.wid] = true; } + D(3, "staled warps\n" << flush); } + D(3, "About to schedule warp\n" << flush); warpScheduler(); + D(3, "Scheduled warp" << flush); } } else @@ -413,21 +429,25 @@ void Core::fetch() if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--; } + D(3, "Printing trace" << flush); printTrace(&inst_in_fetch, "Fetch"); + D(3, "printed trace" << flush); // #ifdef PRINT_ACTIVE_THREADS + D(3, "About to print active threads" << flush << "\n"); for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) { if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1"; else cout << " 0"; if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ','; } + D(3, "\nPrinted active threads" << flush); // #endif - #ifdef PRINT_ACTIVE_THREADS + // #ifdef PRINT_ACTIVE_THREADS cout << endl; - #endif + // #endif } void Core::decode() @@ -522,7 +542,7 @@ void Core::load_store() void Core::execute_unit() { - // cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n"; + cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush; bool do_nothing = false; // EXEC is always not busy if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw) @@ -546,6 +566,7 @@ void Core::execute_unit() // cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n'; } + cout << "About to check vs*\n" << flush; if(inst_in_scheduler.vs1 > 0) { scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1]; @@ -554,6 +575,7 @@ void Core::execute_unit() { scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2]; } + cout << "Finished sources\n" << flush; if (scheduler_srcs_ready) { @@ -561,11 +583,15 @@ void Core::execute_unit() // cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n'; renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false; } + + cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush; if(inst_in_scheduler.vd != -1) { vecRenameTable[inst_in_scheduler.vd] = false; } + cout << "Finished wb checking" << "\n" << flush; CPY_TRACE(inst_in_exe, inst_in_scheduler); INIT_TRACE(inst_in_scheduler); + cout << "Finished trace copying and clearning" << "\n" << flush; } else { @@ -583,6 +609,7 @@ void Core::execute_unit() //printTrace(&inst_in_exe, "execute_unit"); // INIT_TRACE(inst_in_exe); + D(3, "EXECUTE END" << flush); } void Core::writeback() diff --git a/simX/instruction.cpp b/simX/instruction.cpp index 88ccd5dc..b3014f01 100644 --- a/simX/instruction.cpp +++ b/simX/instruction.cpp @@ -1105,10 +1105,10 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { { is_vec = true; D(3, "Addition " << rsrc[0] << " " << rsrc[1] << " Dest:" << rdest); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; - vector> mask = c.vreg[0]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; + vector> & mask = c.vreg[0]; if (c.vtype.vsew == 8) { @@ -1166,8 +1166,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { } } - D(3, "Vector Register state after addition:"); + D(3, "Vector Register state after addition:" << flush); for(int i=0; i < c.vreg.size(); i++) + { for(int j=0; j< c.vreg[0].size(); j++) { if (c.vtype.vsew == 8) @@ -1184,13 +1185,16 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; } } + } + + D(3, "After vector register state after addition" << flush); } break; case 24: //vmseq { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1229,9 +1233,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 25: //vmsne { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1270,9 +1274,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 26: //vmsltu { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1311,9 +1315,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 27: //vmslt { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(int8_t i = 0; i < c.vl; i++){ int8_t *first_ptr = (int8_t *)vr1[i].val; @@ -1351,9 +1355,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 28: //vmsleu { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1391,9 +1395,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 29: //vmsle { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(int8_t i = 0; i < c.vl; i++){ int8_t *first_ptr = (int8_t *)vr1[i].val; @@ -1431,9 +1435,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 30: //vmsgtu { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1471,9 +1475,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; case 31: //vmsgt { - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(int8_t i = 0; i < c.vl; i++){ int8_t *first_ptr = (int8_t *)vr1[i].val; @@ -1522,9 +1526,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 24: //vmandnot { D(3, "vmandnot"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1584,9 +1588,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 25: //vmand { D(3, "vmand"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1645,9 +1649,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 26: //vmor { D(3, "vmor"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1706,9 +1710,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 27: //vmxor { D(3, "vmxor"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ uint8_t *result_ptr; for(uint8_t i = 0; i < c.vl; i++){ @@ -1767,9 +1771,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 28: //vmornot { D(3, "vmornot"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1825,9 +1829,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 29: //vmnand { D(3, "vmnand"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -1887,9 +1891,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 30: //vmnor { D(3, "vmnor"); - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ uint8_t *result_ptr; @@ -1951,9 +1955,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { D(3, "vmxnor"); uint8_t *result_ptr; - vector> vr1 = c.vreg[rsrc[0]]; - vector> vr2 = c.vreg[rsrc[1]]; - vector> vd = c.vreg[rdest]; + vector> & vr1 = c.vreg[rsrc[0]]; + vector> & vr2 = c.vreg[rsrc[1]]; + vector> & vd = c.vreg[rdest]; if(c.vtype.vsew == 8){ for(uint8_t i = 0; i < c.vl; i++){ uint8_t *first_ptr = (uint8_t *)vr1[i].val; @@ -2053,6 +2057,11 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { } } break; + default: + { + cout << "default???\n" << flush; + + } } break; case VL: @@ -2064,63 +2073,73 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { D(3, "src: " << rsrc[0] << " " << reg[rsrc[0]]); D(3, "dest" << rdest); D(3, "width" << vlsWidth); - vector> vd = c.vreg[rdest]; + vector> & vd = c.vreg[rdest]; - switch(vlsWidth) { - case 6: //load word and unit strided (not checking for unit stride) - for(Word i = 0; i < c.vl; i++) { - memAddr = ((reg[rsrc[0]]) & 0xFFFFFFFC) + (i*c.vtype.vsew/8); - data_read = c.core->mem.read(memAddr, c.supervisorMode); - D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); - int * result_ptr = (int *) vd[i].val; - *result_ptr = data_read; - - trace_inst->is_lw = true; - trace_inst->mem_addresses[i] = memAddr; - } - /*for(Word i = c.vl; i < VLMAX; i++){ - int * result_ptr = (int *) vd[i].val; - *result_ptr = 0; - }*/ - - D(3, "Vector Register state after addition:"); - for(int i=0; i < 32; i++) + switch(vlsWidth) + { + case 6: //load word and unit strided (not checking for unit stride) { - for(int j=0; j< c.vl; j++) - { - cout << "starting iter" << endl; - if (c.vtype.vsew == 8) - { - uint8_t * ptr_val = (uint8_t *) c.vreg[i][j].val; - std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - } else if (c.vtype.vsew == 16) - { - uint16_t * ptr_val = (uint16_t *) c.vreg[i][j].val; - std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - } else if (c.vtype.vsew == 32) - { - uint32_t * ptr_val = (uint32_t *) c.vreg[i][j].val; - std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - } + for(Word i = 0; i < c.vl; i++) { + memAddr = ((reg[rsrc[0]]) & 0xFFFFFFFC) + (i*c.vtype.vsew/8); + data_read = c.core->mem.read(memAddr, c.supervisorMode); + D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); + int * result_ptr = (int *) vd[i].val; + *result_ptr = data_read; - cout << "Finished iter" << endl; + trace_inst->is_lw = true; + trace_inst->mem_addresses[i] = memAddr; } - } + /*for(Word i = c.vl; i < VLMAX; i++){ + int * result_ptr = (int *) vd[i].val; + *result_ptr = 0; + }*/ - cout << "Finished loop" << endl; + D(3, "Vector Register state ----:"); + // for(int i=0; i < 32; i++) + // { + // for(int j=0; j< c.vl; j++) + // { + // cout << "starting iter" << endl; + // if (c.vtype.vsew == 8) + // { + // uint8_t * ptr_val = (uint8_t *) c.vreg[i][j].val; + // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; + // } else if (c.vtype.vsew == 16) + // { + // uint16_t * ptr_val = (uint16_t *) c.vreg[i][j].val; + // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; + // } else if (c.vtype.vsew == 32) + // { + // uint32_t * ptr_val = (uint32_t *) c.vreg[i][j].val; + // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; + // } + + // cout << "Finished iter" << endl; + // } + // } + + cout << "Finished loop" << endl; + } + cout << "aaaaaaaaaaaaaaaaaaaaaa" << endl; + break; + default: + { + cout << "Serious default??\n" << flush; + } break; } - cout << "hhhhhhhhhhhhhhh" << endl; break; } break; case VS: is_vec = true; VLMAX = (c.vtype.vlmul * c.VLEN)/c.vtype.vsew; - for(Word i = 0; i < c.vl; i++) { + for(Word i = 0; i < c.vl; i++) + { + cout << "iter" << endl; ++c.stores; memAddr = reg[rsrc[0]] + (i*c.vtype.vsew/8); - std::cout << "STORE MEM ADDRESS: " << std::hex << memAddr << "\n"; + std::cout << "STORE MEM ADDRESS *** : " << std::hex << memAddr << "\n"; trace_inst->is_sw = true; @@ -2131,16 +2150,20 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { case 6: //store word and unit strided (not checking for unit stride) { uint32_t * ptr_val = (uint32_t *) c.vreg[vs3][i].val; + D(3, "value: " << flush << (*ptr_val) << flush); c.core->mem.write(memAddr, *ptr_val, c.supervisorMode, 4); - D(3, "store: " << memAddr << " value:" << *ptr_val); + D(3, "store: " << memAddr << " value:" << *ptr_val << flush); } break; default: - cout << "ERROR: UNSUPPORTED S INST\n"; + cout << "ERROR: UNSUPPORTED S INST\n" << flush; exit(1); } - c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); + cout << "Loop finished" << endl; + // c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); } + + cout << "After for loop" << endl; break; default: cout << "pc: " << hex << (c.pc-4) << "\n"; @@ -2148,13 +2171,14 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { exit(1); } - cout << "outside case" << endl; + // break; + cout << "outside case" << endl << flush; } - std::cout << "finished instruction" << endl; + std::cout << "finished instruction" << endl << flush; - D(3, "End instruction execute."); + D(3, "End instruction execute." << flush); c.activeThreads = nextActiveThreads;