From 70dadca9fe16ed3e6a9f21bc5302f4c9899ce67f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 23 May 2020 00:22:56 -0400 Subject: [PATCH] fix scheduler rename_table X values - reverted valid bits --- driver/rtlsim/Makefile | 18 ++++++++++-------- hw/Makefile | 19 +++++++++++++++---- hw/rtl/VX_scheduler.v | 25 +++++++++---------------- hw/simulate/simulator.cpp | 25 +++++++++++++++---------- 4 files changed, 49 insertions(+), 38 deletions(-) diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index c29005ec..4647433d 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -4,13 +4,15 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime # control RTL debug print states -DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \ - -DDBG_PRINT_CORE_DCACHE \ - -DDBG_PRINT_CACHE_BANK \ - -DDBG_PRINT_CACHE_SNP \ - -DDBG_PRINT_CACHE_MSRQ \ - -DDBG_PRINT_DRAM \ - -DDBG_PRINT_OPAE +DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ + -DDBG_PRINT_CORE_DCACHE \ + -DDBG_PRINT_CACHE_BANK \ + -DDBG_PRINT_CACHE_SNP \ + -DDBG_PRINT_CACHE_MSRQ \ + -DDBG_PRINT_DRAM \ + -DDBG_PRINT_OPAE + +#DBG_PRINT=$(DBG_PRINT_FLAGS) #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 @@ -43,7 +45,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64 # Debugigng ifdef DEBUG - VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT) + VL_FLAGS += --trace $(DBG_PRINT) CFLAGS += -DVCD_OUTPUT else CFLAGS += -DNDEBUG diff --git a/hw/Makefile b/hw/Makefile index 7bb9e223..bfc86950 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -16,11 +16,22 @@ VF += -DGLOBAL_BLOCK_SIZE=64 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 +# control RTL debug print states +DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ + -DDBG_PRINT_CORE_DCACHE \ + -DDBG_PRINT_CACHE_BANK \ + -DDBG_PRINT_CACHE_SNP \ + -DDBG_PRINT_CACHE_MSRQ \ + -DDBG_PRINT_DRAM \ + -DDBG_PRINT_OPAE + +#DBG_PRINT=$(DBG_PRINT_FLAGSs) + INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp -DBG += --trace -DVL_DEBUG=1 +DBG += --trace THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') @@ -33,16 +44,16 @@ gen-s: build_config verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG' gen-sd: build_config - verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG) + verilator $(VF) -cc Vortex_Socket.v $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG) gen-st: build_config - verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS) + verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS) gen-m: build_config verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: build_config - verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG) + verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG) gen-mt: build_config verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 2dfbb2cb..a1e85d79 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -17,14 +17,13 @@ module VX_scheduler ( assign is_empty = count_valid == 0; reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - reg[31:0] valid_table [`NUM_WARPS-1:0]; wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0); wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); - wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs1]; - wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs2]; - wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rd ]; + wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0); + wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0); + wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0); wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); @@ -35,7 +34,7 @@ module VX_scheduler ( wire is_csr = bckE_req_if.is_csr; wire is_exec = !is_mem && !is_gpu && !is_csr; - wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn; + wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn; wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0)); wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2)); @@ -44,37 +43,31 @@ module VX_scheduler ( wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; assign schedule_delay = (| bckE_req_if.valid) - && ((rename_valid ) + && ((rename_valid) || (memory_delay && is_mem) || (gpr_stage_delay && (is_mem || is_exec)) || (exec_delay && is_exec)); integer i, w; - wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd]; - wire[`NUM_THREADS-1:0] invalidate_mask = (~writeback_if.valid); - - wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask; - wire valid_wb_new_valid = valid_wb_new_mask != 0; + wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd]; + wire[`NUM_THREADS-1:0] invalidate_mask = ~writeback_if.valid; + wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask; always @(posedge clk) begin if (reset) begin for (w = 0; w < `NUM_WARPS; w=w+1) begin for (i = 0; i < 32; i++) begin - // rename_table[w][i] <= 0; - valid_table[w][i] <= 0; + rename_table[w][i] <= 0; end end end else begin if (valid_wb) begin rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; - valid_table [writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_valid; - end if (!schedule_delay && wb_inc) begin rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; - valid_table [bckE_req_if.warp_num][bckE_req_if.rd] <= 1'b1; end if (valid_wb diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 5d3a2c64..27ecb0d7 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -10,16 +10,20 @@ double sc_time_stamp() { Simulator::Simulator() { // force random values for unitialized signals - const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+0"}; + const char* args[] = {"", "+verilator+rand+reset+1", "+verilator+seed+0"}; Verilated::commandArgs(3, args); -#ifndef NDEBUG - Verilated::debug(1); -#endif - ram_ = nullptr; vortex_ = new VVortex_Socket(); + // initial values + vortex_->dram_req_ready = 0; + vortex_->dram_rsp_valid = 0; + vortex_->io_req_ready = 0; + vortex_->io_rsp_valid = 0; + vortex_->snp_req_valid = 0; + vortex_->snp_rsp_ready = 0; + #ifdef VCD_OUTPUT Verilated::traceEverOn(true); trace_ = new VerilatedVcdC; @@ -47,7 +51,7 @@ void Simulator::print_stats(std::ostream& out) { void Simulator::dbus_driver() { if (ram_ == nullptr) { - vortex_->dram_req_ready = false; + vortex_->dram_req_ready = 0; return; } @@ -126,7 +130,8 @@ void Simulator::io_driver() { char c = (char)data_write; std::cout << c; } - vortex_->io_req_ready = true; + vortex_->io_req_ready = 1; + vortex_->io_rsp_valid = 01; } void Simulator::reset() { @@ -180,8 +185,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { // submit snoop requests for the needed blocks vortex_->snp_req_addr = aligned_addr_start; - vortex_->snp_req_valid = true; - vortex_->snp_rsp_ready = true; + vortex_->snp_req_valid = 1; + vortex_->snp_rsp_ready = 1; for (;;) { this->step(); if (vortex_->snp_rsp_valid) { @@ -192,7 +197,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { ++outstanding_snp_reqs; vortex_->snp_req_addr += 1; if (vortex_->snp_req_addr >= aligned_addr_end) { - vortex_->snp_req_valid = false; + vortex_->snp_req_valid = 0; } } if (!vortex_->snp_req_valid