fix scheduler rename_table X values - reverted valid bits

This commit is contained in:
Blaise Tine
2020-05-23 00:22:56 -04:00
parent 1512138a15
commit 70dadca9fe
4 changed files with 49 additions and 38 deletions

View File

@@ -4,7 +4,7 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
# control RTL debug print states # control RTL debug print states
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \ -DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \ -DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_SNP \
@@ -12,6 +12,8 @@ DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_DRAM \ -DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE -DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
@@ -43,7 +45,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG
VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT) VL_FLAGS += --trace $(DBG_PRINT)
CFLAGS += -DVCD_OUTPUT CFLAGS += -DVCD_OUTPUT
else else
CFLAGS += -DNDEBUG CFLAGS += -DNDEBUG

View File

@@ -16,11 +16,22 @@ VF += -DGLOBAL_BLOCK_SIZE=64
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
# control RTL debug print states
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGSs)
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp
DBG += --trace -DVL_DEBUG=1 DBG += --trace
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
@@ -33,16 +44,16 @@ gen-s: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG' verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
gen-sd: build_config gen-sd: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG) verilator $(VF) -cc Vortex_Socket.v $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
gen-st: build_config gen-st: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS) verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-m: build_config gen-m: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md: build_config gen-md: build_config
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG) verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
gen-mt: build_config gen-mt: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)

View File

@@ -17,14 +17,13 @@ module VX_scheduler (
assign is_empty = count_valid == 0; assign is_empty = count_valid == 0;
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg[31:0] valid_table [`NUM_WARPS-1:0];
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0); wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs1]; wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs2]; wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rd ]; wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
@@ -52,29 +51,23 @@ module VX_scheduler (
integer i, w; integer i, w;
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd]; wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
wire[`NUM_THREADS-1:0] invalidate_mask = (~writeback_if.valid); wire[`NUM_THREADS-1:0] invalidate_mask = ~writeback_if.valid;
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask; wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
wire valid_wb_new_valid = valid_wb_new_mask != 0;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (w = 0; w < `NUM_WARPS; w=w+1) begin for (w = 0; w < `NUM_WARPS; w=w+1) begin
for (i = 0; i < 32; i++) begin for (i = 0; i < 32; i++) begin
// rename_table[w][i] <= 0; rename_table[w][i] <= 0;
valid_table[w][i] <= 0;
end end
end end
end else begin end else begin
if (valid_wb) begin if (valid_wb) begin
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
valid_table [writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_valid;
end end
if (!schedule_delay && wb_inc) begin if (!schedule_delay && wb_inc) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
valid_table [bckE_req_if.warp_num][bckE_req_if.rd] <= 1'b1;
end end
if (valid_wb if (valid_wb

View File

@@ -10,16 +10,20 @@ double sc_time_stamp() {
Simulator::Simulator() { Simulator::Simulator() {
// force random values for unitialized signals // force random values for unitialized signals
const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+0"}; const char* args[] = {"", "+verilator+rand+reset+1", "+verilator+seed+0"};
Verilated::commandArgs(3, args); Verilated::commandArgs(3, args);
#ifndef NDEBUG
Verilated::debug(1);
#endif
ram_ = nullptr; ram_ = nullptr;
vortex_ = new VVortex_Socket(); vortex_ = new VVortex_Socket();
// initial values
vortex_->dram_req_ready = 0;
vortex_->dram_rsp_valid = 0;
vortex_->io_req_ready = 0;
vortex_->io_rsp_valid = 0;
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
#ifdef VCD_OUTPUT #ifdef VCD_OUTPUT
Verilated::traceEverOn(true); Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC; trace_ = new VerilatedVcdC;
@@ -47,7 +51,7 @@ void Simulator::print_stats(std::ostream& out) {
void Simulator::dbus_driver() { void Simulator::dbus_driver() {
if (ram_ == nullptr) { if (ram_ == nullptr) {
vortex_->dram_req_ready = false; vortex_->dram_req_ready = 0;
return; return;
} }
@@ -126,7 +130,8 @@ void Simulator::io_driver() {
char c = (char)data_write; char c = (char)data_write;
std::cout << c; std::cout << c;
} }
vortex_->io_req_ready = true; vortex_->io_req_ready = 1;
vortex_->io_rsp_valid = 01;
} }
void Simulator::reset() { void Simulator::reset() {
@@ -180,8 +185,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// submit snoop requests for the needed blocks // submit snoop requests for the needed blocks
vortex_->snp_req_addr = aligned_addr_start; vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_valid = true; vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = true; vortex_->snp_rsp_ready = 1;
for (;;) { for (;;) {
this->step(); this->step();
if (vortex_->snp_rsp_valid) { if (vortex_->snp_rsp_valid) {
@@ -192,7 +197,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
++outstanding_snp_reqs; ++outstanding_snp_reqs;
vortex_->snp_req_addr += 1; vortex_->snp_req_addr += 1;
if (vortex_->snp_req_addr >= aligned_addr_end) { if (vortex_->snp_req_addr >= aligned_addr_end) {
vortex_->snp_req_valid = false; vortex_->snp_req_valid = 0;
} }
} }
if (!vortex_->snp_req_valid if (!vortex_->snp_req_valid