fix scheduler rename_table X values - reverted valid bits
This commit is contained in:
@@ -4,13 +4,15 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
|||||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
||||||
|
|
||||||
# control RTL debug print states
|
# control RTL debug print states
|
||||||
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
|
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||||
-DDBG_PRINT_CORE_DCACHE \
|
-DDBG_PRINT_CORE_DCACHE \
|
||||||
-DDBG_PRINT_CACHE_BANK \
|
-DDBG_PRINT_CACHE_BANK \
|
||||||
-DDBG_PRINT_CACHE_SNP \
|
-DDBG_PRINT_CACHE_SNP \
|
||||||
-DDBG_PRINT_CACHE_MSRQ \
|
-DDBG_PRINT_CACHE_MSRQ \
|
||||||
-DDBG_PRINT_DRAM \
|
-DDBG_PRINT_DRAM \
|
||||||
-DDBG_PRINT_OPAE
|
-DDBG_PRINT_OPAE
|
||||||
|
|
||||||
|
#DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||||
|
|
||||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
||||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||||
@@ -43,7 +45,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
|
|||||||
|
|
||||||
# Debugigng
|
# Debugigng
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT)
|
VL_FLAGS += --trace $(DBG_PRINT)
|
||||||
CFLAGS += -DVCD_OUTPUT
|
CFLAGS += -DVCD_OUTPUT
|
||||||
else
|
else
|
||||||
CFLAGS += -DNDEBUG
|
CFLAGS += -DNDEBUG
|
||||||
|
|||||||
19
hw/Makefile
19
hw/Makefile
@@ -16,11 +16,22 @@ VF += -DGLOBAL_BLOCK_SIZE=64
|
|||||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||||
|
|
||||||
|
# control RTL debug print states
|
||||||
|
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||||
|
-DDBG_PRINT_CORE_DCACHE \
|
||||||
|
-DDBG_PRINT_CACHE_BANK \
|
||||||
|
-DDBG_PRINT_CACHE_SNP \
|
||||||
|
-DDBG_PRINT_CACHE_MSRQ \
|
||||||
|
-DDBG_PRINT_DRAM \
|
||||||
|
-DDBG_PRINT_OPAE
|
||||||
|
|
||||||
|
#DBG_PRINT=$(DBG_PRINT_FLAGSs)
|
||||||
|
|
||||||
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
|
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
|
||||||
|
|
||||||
SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp
|
SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp
|
||||||
|
|
||||||
DBG += --trace -DVL_DEBUG=1
|
DBG += --trace
|
||||||
|
|
||||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||||
|
|
||||||
@@ -33,16 +44,16 @@ gen-s: build_config
|
|||||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
|
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
|
||||||
|
|
||||||
gen-sd: build_config
|
gen-sd: build_config
|
||||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
verilator $(VF) -cc Vortex_Socket.v $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
||||||
|
|
||||||
gen-st: build_config
|
gen-st: build_config
|
||||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||||
|
|
||||||
gen-m: build_config
|
gen-m: build_config
|
||||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||||
|
|
||||||
gen-md: build_config
|
gen-md: build_config
|
||||||
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
|
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
|
||||||
|
|
||||||
gen-mt: build_config
|
gen-mt: build_config
|
||||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||||
|
|||||||
@@ -17,14 +17,13 @@ module VX_scheduler (
|
|||||||
assign is_empty = count_valid == 0;
|
assign is_empty = count_valid == 0;
|
||||||
|
|
||||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||||
reg[31:0] valid_table [`NUM_WARPS-1:0];
|
|
||||||
|
|
||||||
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
|
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
|
||||||
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
||||||
|
|
||||||
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs1];
|
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
|
||||||
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs2];
|
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
|
||||||
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rd ];
|
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
|
||||||
|
|
||||||
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
||||||
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||||
@@ -35,7 +34,7 @@ module VX_scheduler (
|
|||||||
wire is_csr = bckE_req_if.is_csr;
|
wire is_csr = bckE_req_if.is_csr;
|
||||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||||
|
|
||||||
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
|
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
|
||||||
|
|
||||||
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
|
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
|
||||||
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
|
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
|
||||||
@@ -44,37 +43,31 @@ module VX_scheduler (
|
|||||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||||
|
|
||||||
assign schedule_delay = (| bckE_req_if.valid)
|
assign schedule_delay = (| bckE_req_if.valid)
|
||||||
&& ((rename_valid )
|
&& ((rename_valid)
|
||||||
|| (memory_delay && is_mem)
|
|| (memory_delay && is_mem)
|
||||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||||
|| (exec_delay && is_exec));
|
|| (exec_delay && is_exec));
|
||||||
|
|
||||||
integer i, w;
|
integer i, w;
|
||||||
|
|
||||||
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
|
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
|
||||||
wire[`NUM_THREADS-1:0] invalidate_mask = (~writeback_if.valid);
|
wire[`NUM_THREADS-1:0] invalidate_mask = ~writeback_if.valid;
|
||||||
|
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
|
||||||
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
|
|
||||||
wire valid_wb_new_valid = valid_wb_new_mask != 0;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
for (w = 0; w < `NUM_WARPS; w=w+1) begin
|
for (w = 0; w < `NUM_WARPS; w=w+1) begin
|
||||||
for (i = 0; i < 32; i++) begin
|
for (i = 0; i < 32; i++) begin
|
||||||
// rename_table[w][i] <= 0;
|
rename_table[w][i] <= 0;
|
||||||
valid_table[w][i] <= 0;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (valid_wb) begin
|
if (valid_wb) begin
|
||||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||||
valid_table [writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_valid;
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if (!schedule_delay && wb_inc) begin
|
if (!schedule_delay && wb_inc) begin
|
||||||
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
||||||
valid_table [bckE_req_if.warp_num][bckE_req_if.rd] <= 1'b1;
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if (valid_wb
|
if (valid_wb
|
||||||
|
|||||||
@@ -10,16 +10,20 @@ double sc_time_stamp() {
|
|||||||
|
|
||||||
Simulator::Simulator() {
|
Simulator::Simulator() {
|
||||||
// force random values for unitialized signals
|
// force random values for unitialized signals
|
||||||
const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+0"};
|
const char* args[] = {"", "+verilator+rand+reset+1", "+verilator+seed+0"};
|
||||||
Verilated::commandArgs(3, args);
|
Verilated::commandArgs(3, args);
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
Verilated::debug(1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ram_ = nullptr;
|
ram_ = nullptr;
|
||||||
vortex_ = new VVortex_Socket();
|
vortex_ = new VVortex_Socket();
|
||||||
|
|
||||||
|
// initial values
|
||||||
|
vortex_->dram_req_ready = 0;
|
||||||
|
vortex_->dram_rsp_valid = 0;
|
||||||
|
vortex_->io_req_ready = 0;
|
||||||
|
vortex_->io_rsp_valid = 0;
|
||||||
|
vortex_->snp_req_valid = 0;
|
||||||
|
vortex_->snp_rsp_ready = 0;
|
||||||
|
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
Verilated::traceEverOn(true);
|
Verilated::traceEverOn(true);
|
||||||
trace_ = new VerilatedVcdC;
|
trace_ = new VerilatedVcdC;
|
||||||
@@ -47,7 +51,7 @@ void Simulator::print_stats(std::ostream& out) {
|
|||||||
|
|
||||||
void Simulator::dbus_driver() {
|
void Simulator::dbus_driver() {
|
||||||
if (ram_ == nullptr) {
|
if (ram_ == nullptr) {
|
||||||
vortex_->dram_req_ready = false;
|
vortex_->dram_req_ready = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -126,7 +130,8 @@ void Simulator::io_driver() {
|
|||||||
char c = (char)data_write;
|
char c = (char)data_write;
|
||||||
std::cout << c;
|
std::cout << c;
|
||||||
}
|
}
|
||||||
vortex_->io_req_ready = true;
|
vortex_->io_req_ready = 1;
|
||||||
|
vortex_->io_rsp_valid = 01;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Simulator::reset() {
|
void Simulator::reset() {
|
||||||
@@ -180,8 +185,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||||||
|
|
||||||
// submit snoop requests for the needed blocks
|
// submit snoop requests for the needed blocks
|
||||||
vortex_->snp_req_addr = aligned_addr_start;
|
vortex_->snp_req_addr = aligned_addr_start;
|
||||||
vortex_->snp_req_valid = true;
|
vortex_->snp_req_valid = 1;
|
||||||
vortex_->snp_rsp_ready = true;
|
vortex_->snp_rsp_ready = 1;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
this->step();
|
this->step();
|
||||||
if (vortex_->snp_rsp_valid) {
|
if (vortex_->snp_rsp_valid) {
|
||||||
@@ -192,7 +197,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||||||
++outstanding_snp_reqs;
|
++outstanding_snp_reqs;
|
||||||
vortex_->snp_req_addr += 1;
|
vortex_->snp_req_addr += 1;
|
||||||
if (vortex_->snp_req_addr >= aligned_addr_end) {
|
if (vortex_->snp_req_addr >= aligned_addr_end) {
|
||||||
vortex_->snp_req_valid = false;
|
vortex_->snp_req_valid = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!vortex_->snp_req_valid
|
if (!vortex_->snp_req_valid
|
||||||
|
|||||||
Reference in New Issue
Block a user