From 5758ef9ebf33d362d3be6de589da86fbe6053951 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Nov 2020 18:41:36 -0800 Subject: [PATCH] generic_register reset network optimization --- hw/rtl/VX_alu_unit.v | 3 +- hw/rtl/VX_csr_io_arb.v | 1 + hw/rtl/VX_csr_unit.v | 3 +- hw/rtl/VX_fpu_unit.v | 3 +- hw/rtl/VX_gpu_unit.v | 3 +- hw/rtl/VX_io_arb.v | 1 + hw/rtl/VX_lsu_unit.v | 6 ++- hw/rtl/VX_mem_arb.v | 1 + hw/rtl/VX_mul_unit.v | 3 +- hw/rtl/VX_opd_collect.v | 3 +- hw/rtl/VX_warp_sched.v | 3 +- hw/rtl/VX_writeback.v | 3 +- hw/rtl/cache/VX_bank.v | 52 +++++++++++++------------- hw/rtl/cache/VX_cache_core_rsp_merge.v | 1 + hw/rtl/cache/VX_cache_dram_req_arb.v | 1 + hw/rtl/cache/VX_data_access.v | 4 +- hw/rtl/cache/VX_snp_forwarder.v | 1 + hw/rtl/cache/VX_snp_rsp_arb.v | 1 + hw/rtl/fp_cores/VX_fp_noncomp.v | 9 +++-- hw/rtl/libs/VX_cam_buffer.v | 4 +- hw/rtl/libs/VX_generic_register.v | 26 +++++++++---- 21 files changed, 84 insertions(+), 48 deletions(-) diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 24eebc52..6a63e434 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -97,7 +97,8 @@ module VX_alu_unit #( wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index 4155e9bd..f0d88ba2 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -64,6 +64,7 @@ module VX_csr_io_arb #( VX_generic_register #( .N(1 + 32), + .R(1), .PASSTHRU(NUM_REQUESTS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 1689121a..05f4daf4 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -99,7 +99,8 @@ module VX_csr_unit #( wire stall_out = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index c95ae8d4..5199baed 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -150,7 +150,8 @@ module VX_fpu_unit #( wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index cfbc704f..26905ce1 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -76,7 +76,8 @@ module VX_gpu_unit #( wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_io_arb.v b/hw/rtl/VX_io_arb.v index 1742e7a5..ab9ff6b6 100644 --- a/hw/rtl/VX_io_arb.v +++ b/hw/rtl/VX_io_arb.v @@ -68,6 +68,7 @@ module VX_io_arb #( VX_generic_register #( .N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)), + .R(`NUM_THREADS), .PASSTHRU(NUM_REQUESTS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 9c02c59d..1194aa40 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -74,7 +74,8 @@ module VX_lsu_unit #( wire stall_in; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), + .R(1) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -180,7 +181,8 @@ module VX_lsu_unit #( wire arb_wb = is_store_req ? 0 : rsp_wb; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .R(1) ) pipe_reg1 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index 8f7b6a46..713e37e1 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -63,6 +63,7 @@ module VX_mem_arb #( VX_generic_register #( .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH), + .R(1), .PASSTHRU(NUM_REQUESTS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 39c0432b..41841880 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -144,7 +144,8 @@ module VX_mul_unit #( wire [`NUM_THREADS-1:0][31:0] result = mul_valid_out ? mul_result : div_result; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_opd_collect.v b/hw/rtl/VX_opd_collect.v index 30f35448..8b4210ae 100644 --- a/hw/rtl/VX_opd_collect.v +++ b/hw/rtl/VX_opd_collect.v @@ -47,7 +47,8 @@ module VX_opd_collect #( wire stall_out = valid_out && ~ready_out; VX_generic_register #( - .N(1+INSTW+OPDSW) + .N(1 + INSTW + OPDSW), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 12fdf6b8..a1e503de 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -238,7 +238,8 @@ module VX_warp_sched #( assign scheduled_warp = schedule_valid && ~stall_out; VX_generic_register #( - .N(1 + `NUM_THREADS + 32 + `NW_BITS) + .N(1 + `NUM_THREADS + 32 + `NW_BITS), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index d771f167..4eb8c998 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -76,7 +76,8 @@ module VX_writeback #( always @(*) assert(writeback_if.ready); // the writeback currently has no backpressure from issue stage VX_generic_register #( - .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) + .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)), + .R(1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index e1fa64e0..a8ea5bab 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -408,17 +408,18 @@ if (DRAM_ENABLE) begin // we have a miss in msrq or in stage 3 for the current address wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); + || (valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH), + .R(1) ) pipe_reg0 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), - .out ({is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) + .in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), + .out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -432,8 +433,8 @@ if (DRAM_ENABLE) begin assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; // we have a matching previous request that missed alreedy - wire st2_pending_hazard_st1 = (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); - wire st3_pending_hazard_st1 = (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1); + wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); + wire st3_pending_hazard_st1 = valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1); // force miss to ensure commit order when a new request has pending previous requests to same block // also force a miss for msrq requests when previous requests got a miss @@ -483,14 +484,15 @@ if (DRAM_ENABLE) begin assign misses = miss_st1; VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH) + .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH), + .R(1) ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), - .out ({is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) + .in ({valid_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), + .out ({valid_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) ); end else begin @@ -509,7 +511,6 @@ end else begin assign inst_meta_st1= inst_meta_st0; assign snp_inv_st1 = snp_inv_st0; assign addr_st1 = addr_st0; - assign mem_byteen_st1 = 0; assign dirty_st1 = 0; assign readtag_st1 = 0; assign miss_st1 = 0; @@ -545,7 +546,7 @@ end end `endif - assign is_mshr_miss_st2 = (miss_st2 || force_miss_st2) && is_mshr_st2; + assign is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); VX_data_access #( .BANK_ID (BANK_ID), @@ -602,7 +603,7 @@ end wire send_snp_rsp_st3; wire incoming_fill_st3; - wire send_core_rsp_st2 = valid_st2 && !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2; + wire send_core_rsp_st2 = !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2; // check if a matching fill request is comming wire incoming_fill_dfp_st2 = dram_rsp_fire && (addr_st2 == dram_rsp_addr); @@ -612,28 +613,29 @@ end || incoming_fill_st0_st2 || incoming_fill_st1_st2; - wire send_fill_req_st2 = valid_st2 && miss_st2 + wire send_fill_req_st2 = miss_st2 && (!force_miss_st2 || (is_mshr_st2 && addr_st2 != addr_st3)) && !incoming_fill_st2; - wire do_writeback_st2 = valid_st2 && dirty_st2 + wire do_writeback_st2 = dirty_st2 && (is_fill_st2 || (!force_miss_st2 && is_snp_st2)); wire send_dwb_req_st2 = send_fill_req_st2 || do_writeback_st2; - wire send_snp_rsp_st2 = valid_st2 && is_snp_st2 && !force_miss_st2; + wire send_snp_rsp_st2 = is_snp_st2 && !force_miss_st2; VX_generic_register #( - .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) + .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH), + .R(1) ) pipe_reg2 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st2, incoming_fill_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, force_miss_st2, is_snp_st2, snp_inv_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), - .out ({is_mshr_st3, incoming_fill_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_inv_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) + .in ({valid_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), + .out ({valid_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) ); `ifdef DBG_CACHE_REQ_INFO @@ -644,7 +646,7 @@ end end `endif - assign is_mshr_miss_st3 = (miss_st3 || force_miss_st3) && is_mshr_st3; + assign is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); // Enqueue to miss reserv if it's a valid miss @@ -653,7 +655,7 @@ end wire req_rw_st3; wire[WORD_SIZE-1:0] req_byteen_st3; - wire mshr_push_unqual = miss_st3 || force_miss_st3; + wire mshr_push_unqual = valid_st3 && (miss_st3 || force_miss_st3); assign mshr_push_stall = mshr_push_unqual && mshr_full; wire mshr_push = mshr_push_unqual @@ -672,7 +674,7 @@ end // push missed requests as 'ready' if it was a forced miss but actually had a hit // or the fill request is comming for the missed block - wire mshr_init_ready_state_st3 = !miss_st3 || incoming_fill_st3; + wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_st3); VX_cache_miss_resrv #( .BANK_ID (BANK_ID), @@ -763,7 +765,7 @@ end wire cwbq_empty, cwbq_full; - wire cwbq_push_unqual = send_core_rsp_st3 && !req_rw_st3; + wire cwbq_push_unqual = valid_st3 && send_core_rsp_st3 && !req_rw_st3; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; wire cwbq_push = cwbq_push_unqual @@ -799,7 +801,7 @@ end wire dwbq_empty, dwbq_full; - wire dwbq_push_unqual = send_dwb_req_st3; + wire dwbq_push_unqual = valid_st3 && send_dwb_req_st3; assign dwbq_push_stall = dwbq_push_unqual && dwbq_full; @@ -854,7 +856,7 @@ end wire snpq_empty, snpq_full; - wire snpq_push_unqual = send_snp_rsp_st3; + wire snpq_push_unqual = valid_st3 && send_snp_rsp_st3; assign snpq_push_stall = snpq_push_unqual && snpq_full; @@ -922,7 +924,7 @@ end `ifdef DBG_PRINT_CACHE_BANK wire incoming_fill_dfp_st3 = dram_rsp_fire && (addr_st3 == dram_rsp_addr); always @(posedge clk) begin - if (miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin + if (valid_st3 && miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin $display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), incoming_fill_st3, incoming_fill_dfp_st3); assert(!is_mshr_st3); end diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 3529eeda..a00d1a96 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -93,6 +93,7 @@ module VX_cache_core_rsp_merge #( VX_generic_register #( .N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), + .R(NUM_REQUESTS), .PASSTHRU(NUM_BANKS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index df502fe2..b4a8c015 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -48,6 +48,7 @@ module VX_cache_dram_req_arb #( VX_generic_register #( .N(1 + `DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .R(1), .PASSTHRU(NUM_BANKS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 9ab9c0de..f4dd1100 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -104,8 +104,8 @@ module VX_data_access #( end assign write_enable = valid_in - && writeen_in - && !stall; + && writeen_in + && !stall; assign dirtyb_out = read_dirtyb_out; assign readdata_out = read_data; diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 985e0f71..7b7e70c5 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -179,6 +179,7 @@ module VX_snp_forwarder #( VX_generic_register #( .N(1 + `LOG2UP(SNRQ_SIZE)), + .R(1), .PASSTHRU(NUM_REQUESTS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v index 695c142e..fa3f80df 100644 --- a/hw/rtl/cache/VX_snp_rsp_arb.v +++ b/hw/rtl/cache/VX_snp_rsp_arb.v @@ -36,6 +36,7 @@ module VX_snp_rsp_arb #( VX_generic_register #( .N(1 + SNP_TAG_WIDTH), + .R(1), .PASSTHRU(NUM_BANKS <= 2) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index a08b3d48..74f43f54 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -88,7 +88,8 @@ module VX_fp_noncomp #( wire tmp_ab_equal = (dataa[i] == datab[i]) | (tmp_a_type[4] & tmp_b_type[4]); VX_generic_register #( - .N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1) + .N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1), + .R(0) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -100,7 +101,8 @@ module VX_fp_noncomp #( end VX_generic_register #( - .N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)) + .N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)), + .R(1) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -250,7 +252,8 @@ module VX_fp_noncomp #( || (op_type_r == `FPU_CMP); // CMP VX_generic_register #( - .N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)) + .N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)), + .R(1) ) pipe_reg2 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_cam_buffer.v b/hw/rtl/libs/VX_cam_buffer.v index e93612d8..d74c1452 100644 --- a/hw/rtl/libs/VX_cam_buffer.v +++ b/hw/rtl/libs/VX_cam_buffer.v @@ -54,7 +54,9 @@ module VX_cam_buffer #( end else begin for (integer i = 0; i < CPORTS; i++) begin if (release_slot[i]) begin - assert(0 == free_slots[release_addr[i]]) else $error("%t: releasing invalid slot at port %d", $time, release_addr[i]); + assert(0 == free_slots[release_addr[i]]) else begin + $display("%t: releasing invalid slot at port %d", $time, release_addr[i]); + end end end free_slots <= free_slots_n; diff --git a/hw/rtl/libs/VX_generic_register.v b/hw/rtl/libs/VX_generic_register.v index 637f4f43..169a7ad5 100644 --- a/hw/rtl/libs/VX_generic_register.v +++ b/hw/rtl/libs/VX_generic_register.v @@ -1,7 +1,8 @@ `include "VX_platform.vh" module VX_generic_register #( - parameter N = 1, + parameter N = 1, + parameter R = N, parameter PASSTHRU = 0 ) ( input wire clk, @@ -17,13 +18,24 @@ module VX_generic_register #( `UNUSED_VAR (stall) assign out = flush ? N'(0) : in; end else begin - reg [(N-1):0] value; + reg [N-1:0] value; - always @(posedge clk) begin - if (reset || flush) begin - value <= N'(0); - end else if (~stall) begin - value <= in; + if (R != 0) begin + always @(posedge clk) begin + if (~stall) begin + value <= in; + end + if (reset || flush) begin + value[N-1:N-R] <= R'(0); + end + end + end else begin + `UNUSED_VAR (reset) + `UNUSED_VAR (flush) + always @(posedge clk) begin + if (~stall) begin + value <= in; + end end end