diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 5958f75a..3c55c6ee 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -8,7 +8,8 @@ module VX_commit #( // inputs VX_commit_if alu_commit_if, - VX_commit_if lsu_commit_if, + VX_commit_if ld_commit_if, + VX_commit_if st_commit_if, VX_commit_if mul_commit_if, VX_commit_if csr_commit_if, VX_commit_if fpu_commit_if, @@ -18,37 +19,44 @@ module VX_commit #( VX_writeback_if writeback_if, VX_cmt_to_csr_if cmt_to_csr_if ); - localparam CMTW = $clog2(`NUM_THREADS+1); + localparam CMTW = $clog2(3*`NUM_THREADS+1); // CSRs update wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready; - wire lsu_commit_fire = lsu_commit_if.valid && lsu_commit_if.ready; + wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready; + wire st_commit_fire = st_commit_if.valid && st_commit_if.ready; wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready; wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready; wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready; wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready; wire commit_fire = alu_commit_fire - || lsu_commit_fire + || ld_commit_fire + || st_commit_fire || csr_commit_fire || mul_commit_fire || fpu_commit_fire || gpu_commit_fire; - wire [`NUM_THREADS-1:0] commit_tmask = alu_commit_fire ? alu_commit_if.tmask: - lsu_commit_fire ? lsu_commit_if.tmask: - csr_commit_fire ? csr_commit_if.tmask: - mul_commit_fire ? mul_commit_if.tmask: - fpu_commit_fire ? fpu_commit_if.tmask: - gpu_commit_if.tmask; + wire [`NUM_THREADS-1:0] commit_tmask1, commit_tmask2, commit_tmask3; + + assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask: + ld_commit_fire ? ld_commit_if.tmask: + csr_commit_fire ? csr_commit_if.tmask: + mul_commit_fire ? mul_commit_if.tmask: + fpu_commit_fire ? fpu_commit_if.tmask: + 0; + + assign commit_tmask2 = st_commit_fire ? st_commit_if.tmask : 0; + assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0; wire [CMTW-1:0] commit_size; VX_countones #( - .N(`NUM_THREADS) - ) commit_ctr ( - .valids(commit_tmask), + .N(3*`NUM_THREADS) + ) commit_ctr1 ( + .valids({commit_tmask3, commit_tmask2, commit_tmask1}), .count (commit_size) ); @@ -64,22 +72,28 @@ module VX_commit #( .reset (reset), .alu_commit_if (alu_commit_if), - .lsu_commit_if (lsu_commit_if), + .ld_commit_if (ld_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), - .fpu_commit_if (fpu_commit_if), - .gpu_commit_if (gpu_commit_if), + .fpu_commit_if (fpu_commit_if), .writeback_if (writeback_if) ); + // store and gpu commits don't writeback + assign st_commit_if.ready = 1'b1; + assign gpu_commit_if.ready = 1'b1; + `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (alu_commit_if.valid && alu_commit_if.ready) begin $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); end - if (lsu_commit_if.valid && lsu_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.PC, lsu_commit_if.tmask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); + if (ld_commit_if.valid && ld_commit_if.ready) begin + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd, ld_commit_if.data); + end + if (st_commit_if.valid && st_commit_if.ready) begin + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.data); end if (csr_commit_if.valid && csr_commit_if.ready) begin $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 026b73c5..c4ae3292 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -31,7 +31,8 @@ module VX_execute #( VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, VX_commit_if alu_commit_if, - VX_commit_if lsu_commit_if, + VX_commit_if ld_commit_if, + VX_commit_if st_commit_if, VX_commit_if csr_commit_if, VX_commit_if mul_commit_if, VX_commit_if fpu_commit_if, @@ -63,7 +64,8 @@ module VX_execute #( .dcache_req_if (dcache_req_if), .dcache_rsp_if (dcache_rsp_if), .lsu_req_if (lsu_req_if), - .lsu_commit_if (lsu_commit_if) + .ld_commit_if (ld_commit_if), + .st_commit_if (st_commit_if) ); VX_csr_unit #( diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 5347626b..d54cb853 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -16,7 +16,8 @@ module VX_lsu_unit #( VX_lsu_req_if lsu_req_if, // outputs - VX_commit_if lsu_commit_if + VX_commit_if ld_commit_if, + VX_commit_if st_commit_if ); wire [`NUM_THREADS-1:0] req_tmask; wire req_rw; @@ -135,7 +136,7 @@ module VX_lsu_unit #( end end - wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid; + wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid; wire store_stall = valid_in && req_rw && stall_out; // Core Request @@ -168,18 +169,23 @@ module VX_lsu_unit #( end end - wire is_store_req = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready; + // send store commit + + wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready; + + assign st_commit_if.valid = is_store_rsp; + assign st_commit_if.wid = req_wid; + assign st_commit_if.tmask = req_tmask; + assign st_commit_if.PC = req_pc; + assign st_commit_if.rd = 0; + assign st_commit_if.wb = 0; + assign st_commit_if.data = 0; + `UNUSED_VAR (st_commit_if.ready) + + // send load commit + wire is_load_rsp = (| dcache_rsp_if.valid); - - wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores - - wire arb_valid = is_store_req || is_load_rsp; - wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid; - wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_tmask : dcache_rsp_if.valid; - wire [31:0] arb_PC = is_store_req ? req_pc : rsp_pc; - wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd; - wire arb_wb = is_store_req ? 0 : rsp_wb; - + VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .R(1) @@ -188,12 +194,12 @@ module VX_lsu_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({arb_valid, arb_wid, arb_tmask, arb_PC, arb_rd, arb_wb, rsp_data}), - .out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.tmask, lsu_commit_if.PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data}) + .in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}), + .out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data}) ); // Can accept new cache response? - assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall); + assign dcache_rsp_if.ready = ~stall_out; // scope registration `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}}); diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 90659c4e..c9746a7b 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -113,8 +113,9 @@ module VX_pipeline #( VX_wstall_if wstall_if(); VX_join_if join_if(); VX_commit_if alu_commit_if(); - VX_commit_if lsu_commit_if(); - VX_commit_if csr_commit_if(); + VX_commit_if ld_commit_if(); + VX_commit_if st_commit_if(); + VX_commit_if csr_commit_if(); VX_commit_if mul_commit_if(); VX_commit_if fpu_commit_if(); VX_commit_if gpu_commit_if(); @@ -191,7 +192,8 @@ module VX_pipeline #( .warp_ctl_if (warp_ctl_if), .branch_ctl_if (branch_ctl_if), .alu_commit_if (alu_commit_if), - .lsu_commit_if (lsu_commit_if), + .ld_commit_if (ld_commit_if), + .st_commit_if (st_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), .fpu_commit_if (fpu_commit_if), @@ -208,7 +210,8 @@ module VX_pipeline #( .reset (reset), .alu_commit_if (alu_commit_if), - .lsu_commit_if (lsu_commit_if), + .ld_commit_if (ld_commit_if), + .st_commit_if (st_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), .fpu_commit_if (fpu_commit_if), diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 4eb8c998..98befe09 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -8,17 +8,16 @@ module VX_writeback #( // inputs VX_commit_if alu_commit_if, - VX_commit_if lsu_commit_if, + VX_commit_if ld_commit_if, VX_commit_if csr_commit_if, VX_commit_if mul_commit_if, - VX_commit_if fpu_commit_if, - VX_commit_if gpu_commit_if, + VX_commit_if fpu_commit_if, // outputs VX_writeback_if writeback_if ); wire alu_valid = alu_commit_if.valid && alu_commit_if.wb; - wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb; + wire ld_valid = ld_commit_if.valid /*&& ld_commit_if.wb*/; wire csr_valid = csr_commit_if.valid && csr_commit_if.wb; wire mul_valid = mul_commit_if.valid && mul_commit_if.wb; wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb; @@ -31,42 +30,42 @@ module VX_writeback #( wire [`NUM_THREADS-1:0][31:0] wb_data; assign wb_valid = alu_valid ? alu_commit_if.valid : - lsu_valid ? lsu_commit_if.valid : + ld_valid ? ld_commit_if.valid : csr_valid ? csr_commit_if.valid : mul_valid ? mul_commit_if.valid : fpu_valid ? fpu_commit_if.valid : 0; assign wb_wid = alu_valid ? alu_commit_if.wid : - lsu_valid ? lsu_commit_if.wid : + ld_valid ? ld_commit_if.wid : csr_valid ? csr_commit_if.wid : mul_valid ? mul_commit_if.wid : fpu_valid ? fpu_commit_if.wid : 0; assign wb_PC = alu_valid ? alu_commit_if.PC : - lsu_valid ? lsu_commit_if.PC : + ld_valid ? ld_commit_if.PC : csr_valid ? csr_commit_if.PC : mul_valid ? mul_commit_if.PC : fpu_valid ? fpu_commit_if.PC : 0; assign wb_tmask = alu_valid ? alu_commit_if.tmask : - lsu_valid ? lsu_commit_if.tmask : + ld_valid ? ld_commit_if.tmask : csr_valid ? csr_commit_if.tmask : mul_valid ? mul_commit_if.tmask : fpu_valid ? fpu_commit_if.tmask : 0; assign wb_rd = alu_valid ? alu_commit_if.rd : - lsu_valid ? lsu_commit_if.rd : + ld_valid ? ld_commit_if.rd : csr_valid ? csr_commit_if.rd : mul_valid ? mul_commit_if.rd : fpu_valid ? fpu_commit_if.rd : 0; assign wb_data = alu_valid ? alu_commit_if.data : - lsu_valid ? lsu_commit_if.data : + ld_valid ? ld_commit_if.data : csr_valid ? csr_commit_if.data : mul_valid ? mul_commit_if.data : fpu_valid ? fpu_commit_if.data : @@ -88,11 +87,10 @@ module VX_writeback #( ); assign alu_commit_if.ready = !stall; - assign lsu_commit_if.ready = !stall && !alu_valid; - assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid; - assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid; - assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid; - assign gpu_commit_if.ready = 1'b1; + assign ld_commit_if.ready = !stall && !alu_valid; + assign csr_commit_if.ready = !stall && !alu_valid && !ld_valid; + assign mul_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid; + assign fpu_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid && !mul_valid; // special workaround to get RISC-V tests Pass/Fail status reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 6925101e..caeb10a1 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -835,10 +835,12 @@ end wire dwbq_pop = dram_req_valid && dram_req_ready; - wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = do_writeback_st3 ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : - addr_st3; + wire writeback = WRITE_ENABLE && do_writeback_st3; - wire [BANK_LINE_SIZE-1:0] dwbq_byteen = do_writeback_st3 ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; + wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : + addr_st3; + + wire [BANK_LINE_SIZE-1:0] dwbq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; if (DRAM_ENABLE) begin VX_generic_queue #( @@ -850,8 +852,8 @@ end .reset (reset), .push (dwbq_push), .pop (dwbq_pop), - .data_in ({do_writeback_st3, dwbq_byteen, dwbq_addr, readdata_st3}), - .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), + .data_in ({writeback, dwbq_byteen, dwbq_addr, readdata_st3}), + .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dwbq_empty), .full (dwbq_full), `UNUSED_PIN (size) @@ -864,6 +866,7 @@ end `UNUSED_VAR (readtag_st3) `UNUSED_VAR (dirtyb_st3) `UNUSED_VAR (readdata_st3) + `UNUSED_VAR (writeback) `UNUSED_VAR (dram_req_ready) assign dwbq_empty = 1; assign dwbq_full = 0; @@ -895,8 +898,8 @@ end if (FLUSH_ENABLE) begin VX_generic_queue #( - .DATAW (SNP_TAG_WIDTH), - .SIZE (SNPQ_SIZE), + .DATAW (SNP_TAG_WIDTH), + .SIZE (SNPQ_SIZE), .BUFFERED(1) ) snp_rsp_queue ( .clk (clk), @@ -933,7 +936,7 @@ end `SCOPE_ASSIGN (valid_st2, valid_st2); `SCOPE_ASSIGN (valid_st3, valid_st3); - `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); + `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); `SCOPE_ASSIGN (miss_st1, miss_st1); `SCOPE_ASSIGN (dirty_st1, dirty_st1); diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 6fd0fc9a..56c28612 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -34,10 +34,11 @@ module VX_cache_core_rsp_merge #( reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_BANKS-1:0] core_rsp_bank_select; - reg [CORE_TAG_ID_BITS-1:0] sel_tag_id; - + if (CORE_TAG_ID_BITS != 0) begin + reg [CORE_TAG_ID_BITS-1:0] sel_tag_id; + always @(*) begin core_rsp_valid_unqual = 0; core_rsp_tag_unqual = 'x; diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index ce2549d6..366bde1e 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -6,7 +6,7 @@ interface VX_cmt_to_csr_if (); wire valid; - wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; + wire [$clog2(3*`NUM_THREADS+1)-1:0] commit_size; endinterface diff --git a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v deleted file mode 100644 index 34e269d5..00000000 --- a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v +++ /dev/null @@ -1,23 +0,0 @@ -`ifndef VX_FPU_TO_CMT_IF -`define VX_FPU_TO_CMT_IF - -`include "VX_define.vh" - -interface VX_fpu_to_cmt_if (); - - wire valid; - - wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] tmask; - wire [31:0] PC; - wire [`NUM_THREADS-1:0][31:0] data; - wire [`NR_BITS-1:0] rd; - wire wb; - wire has_fflags; - fflags_t [`NUM_THREADS-1:0] fflags; - - wire ready; - -endinterface - -`endif \ No newline at end of file