Cleaned up simX for much better performance

This commit is contained in:
fares
2019-11-23 10:32:00 -05:00
parent 852d844618
commit 69139e47bb
5 changed files with 153 additions and 151 deletions

View File

@@ -46,6 +46,7 @@
trace_inst.vd = -1; \
trace_inst.is_lw = false; \
trace_inst.is_sw = false; \
if (trace_inst.mem_addresses != NULL) free(trace_inst.mem_addresses); \
trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \
for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \
@@ -79,19 +80,19 @@ using namespace std;
void printTrace(trace_inst_t * trace, const char * stage_name)
{
cout << "********************************** " << stage_name << " *********************************\n";
cout << "valid: " << trace->valid_inst << '\n';
cout << "PC: " << hex << trace->pc << dec << '\n';
cout << "wid: " << trace->wid << '\n';
cout << "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2 << '\n';
cout << "is_lw: " << trace->is_lw << '\n';
cout << "is_sw: " << trace->is_sw << '\n';
cout << "fetch_stall_cycles: " << trace->fetch_stall_cycles << '\n';
cout << "mem_stall_cycles: " << trace->mem_stall_cycles << '\n';
D(3, "********************************** " << stage_name << " *********************************");
D(3, "valid: " << trace->valid_inst);
D(3, "PC: " << hex << trace->pc << dec);
D(3, "wid: " << trace->wid);
D(3, "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2);
D(3, "is_lw: " << trace->is_lw);
D(3, "is_sw: " << trace->is_sw);
D(3, "fetch_stall_cycles: " << trace->fetch_stall_cycles);
D(3, "mem_stall_cycles: " << trace->mem_stall_cycles);
cout << "stall_warp: " << trace->stall_warp << '\n';
cout << "wspawn: " << trace->wspawn << '\n';
cout << "stalled: " << trace->stalled << '\n';
D(3, "stall_warp: " << trace->stall_warp);
D(3, "wspawn: " << trace->wspawn);
D(3, "stalled: " << trace->stalled);
}
#ifdef EMU_INSTRUMENTATION
@@ -133,9 +134,9 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
cache_simulator = new Vcache_simX;
m_trace = new VerilatedVcdC;
cache_simulator->trace(m_trace, 99);
m_trace->open("simXtrace.vcd");
// m_trace = new VerilatedVcdC;
// cache_simulator->trace(m_trace, 99);
// m_trace->open("simXtrace.vcd");
cache_simulator->reset = 1;
cache_simulator->clk = 0;
@@ -166,36 +167,35 @@ void Core::step()
D(3, "Started core::step" << flush);
steps++;
cout << "CYCLE: " << steps << '\n';
D(3, "CYCLE: " << steps);
cout << "Stalled Warps:\n";
D(3, "Stalled Warps:");
for (int widd = 0; widd < a.getNWarps(); widd++)
{
cout << stallWarp[widd] << " ";
D(3, stallWarp[widd] << " ");
}
cout << '\n';
// cout << "Rename table\n";
// for (int regii = 0; regii < 32; regii++)
// {
// cout << regii << ": " << renameTable[0][regii] << '\n';
// }
cout << '\n' << flush;
// cout << '\n' << flush;
cout << "About to call writeback" << endl;
// cout << "About to call writeback" << endl;
this->writeback();
cout << "About to call load_store" << endl;
// cout << "About to call load_store" << endl;
this->load_store();
cout << "About to call execute_unit" << endl;
// cout << "About to call execute_unit" << endl;
this->execute_unit();
cout << "About to call scheduler" << endl;
// cout << "About to call scheduler" << endl;
this->scheduler();
cout << "About to call decode" << endl;
// cout << "About to call decode" << endl;
this->decode();
D(3, "About to call fetch" << flush);
// D(3, "About to call fetch" << flush);
this->fetch();
D(3, "Finished fetch" << flush);
// D(3, "Finished fetch" << flush);
if (release_warp)
{
@@ -250,7 +250,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 1;
cache_simulator->eval();
m_trace->dump(2*curr_cycle);
// m_trace->dump(2*curr_cycle);
cache_simulator->in_icache_pc_addr = trace_inst->pc;
cache_simulator->in_icache_valid_pc_addr = 1;
@@ -266,7 +266,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
// DCache end
cache_simulator->clk = 0;
cache_simulator->eval();
m_trace->dump(2*curr_cycle+1);
// m_trace->dump(2*curr_cycle+1);
curr_cycle++;
@@ -308,7 +308,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 1;
cache_simulator->eval();
m_trace->dump(2*curr_cycle);
// m_trace->dump(2*curr_cycle);
//////// Feed input
if (cache_simulator->out_icache_stall)
@@ -343,7 +343,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 0;
cache_simulator->eval();
m_trace->dump(2*curr_cycle+1);
// m_trace->dump(2*curr_cycle+1);
curr_cycle++;
@@ -390,9 +390,9 @@ void Core::warpScheduler()
void Core::fetch()
{
#ifdef PRINT_ACTIVE_THREADS
cout << endl << "Threads:";
#endif
// #ifdef PRINT_ACTIVE_THREADS
D(3, "Threads:");
// #endif
// D(-1, "Found schedule: " << foundSchedule);
@@ -436,8 +436,14 @@ void Core::fetch()
// #ifdef PRINT_ACTIVE_THREADS
D(3, "About to print active threads" << flush << "\n");
for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) {
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1";
else cout << " 0";
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j])
{
D(3, " 1");
}
else
{
D(3, " 0");
}
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ',';
}
D(3, "\nPrinted active threads" << flush);
@@ -446,7 +452,6 @@ void Core::fetch()
// #ifdef PRINT_ACTIVE_THREADS
cout << endl;
// #endif
}
@@ -542,7 +547,7 @@ void Core::load_store()
void Core::execute_unit()
{
cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush;
D(3, "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush);
bool do_nothing = false;
// EXEC is always not busy
if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)
@@ -566,7 +571,7 @@ void Core::execute_unit()
// cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n';
}
cout << "About to check vs*\n" << flush;
// cout << "About to check vs*\n" << flush;
if(inst_in_scheduler.vs1 > 0)
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1];
@@ -575,7 +580,7 @@ void Core::execute_unit()
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2];
}
cout << "Finished sources\n" << flush;
// cout << "Finished sources\n" << flush;
if (scheduler_srcs_ready)
{
@@ -584,14 +589,14 @@ void Core::execute_unit()
renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false;
}
cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush;
// cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush;
if(inst_in_scheduler.vd != -1) {
vecRenameTable[inst_in_scheduler.vd] = false;
}
cout << "Finished wb checking" << "\n" << flush;
// cout << "Finished wb checking" << "\n" << flush;
CPY_TRACE(inst_in_exe, inst_in_scheduler);
INIT_TRACE(inst_in_scheduler);
cout << "Finished trace copying and clearning" << "\n" << flush;
// cout << "Finished trace copying and clearning" << "\n" << flush;
}
else
{
@@ -650,7 +655,7 @@ void Core::writeback()
{
if (serviced_exe)
{
cout << "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used\n";
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
inst_in_lsu.stalled = true;
}
else
@@ -679,7 +684,7 @@ bool Core::running() const {
for (unsigned i = 0; i < w.size(); ++i)
if (w[i].running())
{
cout << "Warp ID " << i << " is running\n";
D(3, "Warp ID " << i << " is running");
return true;
}
return false;
@@ -692,7 +697,7 @@ void Core::printStats() const {
cerr << "Total steps: " << steps << endl;
for (unsigned i = 0; i < w.size(); ++i) {
cout << "=== Warp " << i << " ===" << endl;
// cout << "=== Warp " << i << " ===" << endl;
w[i].printStats();
}
}
@@ -742,7 +747,7 @@ void Warp::step(trace_inst_t * trace_inst) {
// ++steps;
D(3, "in step pc=0x" << hex << pc);
cout << "help: in PC: " << hex << pc << dec << '\n';
D(3, "help: in PC: " << hex << pc << dec);
// std::cout << "pc: " << hex << pc << "\n";