From 97739e9dcffcd2db2f3ff1aef0a4f66993b9fb6e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Nov 2020 14:02:47 -0800 Subject: [PATCH] RAM blocks inference fixes --- hw/rtl/VX_fpu_unit.v | 2 +- hw/rtl/VX_gpr_ram.v | 2 + hw/rtl/VX_ibuffer.v | 3 +- hw/rtl/VX_icache_stage.v | 30 +++--- hw/rtl/VX_ipdom_stack.v | 41 ++++---- hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/VX_mul_unit.v | 2 +- hw/rtl/cache/VX_bank.v | 6 +- hw/rtl/cache/VX_cache_core_rsp_merge.v | 40 ++++---- hw/rtl/cache/VX_data_store.v | 4 +- hw/rtl/cache/VX_snp_forwarder.v | 2 +- hw/rtl/fp_cores/VX_fp_addmul.v | 6 +- hw/rtl/fp_cores/VX_fp_div.v | 6 +- hw/rtl/fp_cores/VX_fp_ftoi.v | 6 +- hw/rtl/fp_cores/VX_fp_itof.v | 6 +- hw/rtl/fp_cores/VX_fp_madd.v | 6 +- hw/rtl/fp_cores/VX_fp_sqrt.v | 6 +- hw/rtl/fp_cores/VX_fpnew.v | 2 +- hw/rtl/libs/VX_cam_buffer.v | 55 +++++----- hw/rtl/libs/VX_divide.v | 4 +- hw/rtl/libs/VX_dp_ram.v | 134 +++++++++++++++---------- hw/rtl/libs/VX_generic_queue.v | 31 +++--- hw/rtl/libs/VX_matrix_arbiter.v | 4 +- hw/rtl/libs/VX_multiplier.v | 2 +- hw/rtl/libs/VX_rr_arbiter.v | 2 +- hw/simulate/testbench.cpp | 2 +- hw/syn/quartus/project.tcl | 1 + 27 files changed, 218 insertions(+), 189 deletions(-) diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 5199baed..7dc95815 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -42,7 +42,7 @@ module VX_fpu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`FPUQ_SIZE) - ) fpu_cam ( + ) req_metadata_buf ( .clk (clk), .reset (reset), .acquire_slot (fpuq_push), diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 473a8c74..ede625a9 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -1,6 +1,7 @@ `include "VX_define.vh" `TRACING_OFF + module VX_gpr_ram ( input wire clk, input wire [`NUM_THREADS-1:0] we, @@ -32,4 +33,5 @@ module VX_gpr_ram ( assign rs2_data = q2; endmodule + `TRACING_ON \ No newline at end of file diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 4c4b2534..1859a95f 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -43,7 +43,8 @@ module VX_ibuffer #( VX_generic_queue #( .DATAW(DATAW), - .SIZE(SIZE) + .SIZE(SIZE), + .BUFFERED(1) ) queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index f65eb75d..c4857834 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -20,20 +20,26 @@ module VX_icache_stage #( ); `UNUSED_VAR (reset) - `NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; - `NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; - wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid; - wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0]; + wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0]; - always @(posedge clk) begin - if (icache_req_fire) begin - rsp_PC_buf[req_tag] <= ifetch_req_if.PC; - rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; - end - end + VX_dp_ram #( + .DATAW(32 + `NUM_THREADS), + .SIZE(`NUM_WARPS), + .BUFFERED(0), + .RWCHECK(0) + ) req_metadata ( + .clk(clk), + .waddr(req_tag), + .raddr(rsp_tag), + .wren(icache_req_fire), + .byteen(1'b1), + .rden(1'b1), + .din({ifetch_req_if.PC, ifetch_req_if.tmask}), + .dout({ifetch_rsp_if.PC, ifetch_rsp_if.tmask}) + ); // Icache Request assign icache_req_if.valid = ifetch_req_if.valid; @@ -53,8 +59,6 @@ module VX_icache_stage #( assign ifetch_rsp_if.valid = icache_rsp_if.valid; assign ifetch_rsp_if.wid = rsp_tag; - assign ifetch_rsp_if.tmask = rsp_tmask_buf[rsp_tag]; - assign ifetch_rsp_if.PC = rsp_PC_buf[rsp_tag]; assign ifetch_rsp_if.instr = icache_rsp_if.data[0]; // Can accept new response? @@ -66,7 +70,7 @@ module VX_icache_stage #( `SCOPE_ASSIGN (icache_req_tag, req_tag); `SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready); - `SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data); + `SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data[0]); `SCOPE_ASSIGN (icache_rsp_tag, rsp_tag); `ifdef DBG_PRINT_CORE_ICACHE diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index 4e7d42f9..88bdb18d 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -16,14 +16,11 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; - reg is_part [0:STACK_SIZE-1]; + reg is_part [STACK_SIZE-1:0]; reg [DEPTH-1:0] rd_ptr, wr_ptr; - reg [WIDTH - 1:0] d1, d2; - reg p; + wire [WIDTH - 1:0] d1, d2; always @(posedge clk) begin if (reset) begin @@ -38,22 +35,24 @@ module VX_ipdom_stack #( rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]); end end - end - - always @(posedge clk) begin - if (push) begin - stack_1[wr_ptr] <= q1; - end - end - assign d1 = stack_1[rd_ptr]; - - always @(posedge clk) begin - if (push) begin - stack_2[wr_ptr] <= q2; - end - end - assign d2 = stack_2[rd_ptr]; + end + VX_dp_ram #( + .DATAW(WIDTH * 2), + .SIZE(STACK_SIZE), + .BUFFERED(0), + .RWCHECK(0) + ) store ( + .clk(clk), + .waddr(wr_ptr), + .raddr(rd_ptr), + .wren(push), + .byteen(1'b1), + .rden(1'b1), + .din({q2, q1}), + .dout({d2, d1}) + ); + always @(posedge clk) begin if (push) begin is_part[wr_ptr] <= 0; @@ -61,7 +60,7 @@ module VX_ipdom_stack #( is_part[rd_ptr] <= 1; end end - assign p = is_part[rd_ptr]; + wire p = is_part[rd_ptr]; assign d = p ? d1 : d2; assign empty = ~(| wr_ptr); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 1194aa40..5347626b 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -112,7 +112,7 @@ module VX_lsu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .SIZE (`LSUQ_SIZE) - ) cam_buffer ( + ) req_metadata_buf ( .clk (clk), .reset (reset), .write_addr (req_tag), diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 41841880..1592d4f8 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -34,7 +34,7 @@ module VX_mul_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`MULQ_SIZE) - ) mul_cam ( + ) req_metadata_buf ( .clk (clk), .reset (reset), .acquire_slot (mulq_push), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index a8ea5bab..e2b24b41 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -498,6 +498,7 @@ if (DRAM_ENABLE) begin end else begin `UNUSED_VAR (mshr_pending_hazard_unqual_st0) + `UNUSED_VAR (addr_st0) assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; @@ -510,7 +511,7 @@ end else begin assign writedata_st1= writedata_st0; assign inst_meta_st1= inst_meta_st0; assign snp_inv_st1 = snp_inv_st0; - assign addr_st1 = addr_st0; + assign addr_st1 = reqq_addr_st0[`LINE_SELECT_ADDR_RNG]; assign dirty_st1 = 0; assign readtag_st1 = 0; assign miss_st1 = 0; @@ -782,7 +783,8 @@ end VX_generic_queue #( .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), - .SIZE(CWBQ_SIZE) + .SIZE(CWBQ_SIZE), + .BUFFERED(1) ) cwb_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index a00d1a96..d8e3070e 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -29,18 +29,6 @@ module VX_cache_core_rsp_merge #( input wire core_rsp_ready ); if (NUM_REQUESTS > 1) begin - wire [`BANK_BITS-1:0] sel_idx; - - VX_rr_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_core_rsp_valid), - `UNUSED_PIN (grant_valid), - .grant_index (sel_idx), - `UNUSED_PIN (grant_onehot) - ); reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual; reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; @@ -48,6 +36,19 @@ module VX_cache_core_rsp_merge #( reg [NUM_BANKS-1:0] core_rsp_bank_select; if (CORE_TAG_ID_BITS != 0) begin + wire [`BANK_BITS-1:0] sel_idx; + + VX_rr_arbiter #( + .N(NUM_BANKS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (per_bank_core_rsp_valid), + `UNUSED_PIN (grant_valid), + .grant_index (sel_idx), + `UNUSED_PIN (grant_onehot) + ); + always @(*) begin core_rsp_valid_unqual = 0; core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; @@ -65,17 +66,10 @@ module VX_cache_core_rsp_merge #( end end else begin always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_valid[sel_idx]; - - core_rsp_tag_unqual = 'x; - core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx]; - - core_rsp_data_unqual = 'x; - core_rsp_data_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_data[sel_idx]; - - core_rsp_bank_select = 0; - core_rsp_bank_select[sel_idx] = 1; + core_rsp_valid_unqual = 0; + core_rsp_tag_unqual = 'x; + core_rsp_data_unqual = 'x; + core_rsp_bank_select = 0; for (integer i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index 53253bbb..a3f4f4f3 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -43,9 +43,9 @@ module VX_data_store #( end VX_dp_ram #( - .DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8), + .DATAW(BANK_LINE_SIZE * 8), .SIZE(`BANK_LINE_COUNT), - .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), + .BYTEENW(BANK_LINE_SIZE), .BUFFERED(0), .RWCHECK(1) ) data ( diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 7b7e70c5..a7b0aa6c 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -64,7 +64,7 @@ module VX_snp_forwarder #( VX_cam_buffer #( .DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), .SIZE (SNRQ_SIZE) - ) snp_fwd_cam ( + ) req_metadata_buf ( .clk (clk), .reset (reset), .write_addr (sfq_write_addr), diff --git a/hw/rtl/fp_cores/VX_fp_addmul.v b/hw/rtl/fp_cores/VX_fp_addmul.v index 9fff5949..82b4dda3 100644 --- a/hw/rtl/fp_cores/VX_fp_addmul.v +++ b/hw/rtl/fp_cores/VX_fp_addmul.v @@ -178,14 +178,14 @@ module VX_fp_addmul #( end VX_shift_register #( - .DATAW(TAGW + 1 + 1 + 1), + .DATAW(1 + TAGW + 1 + 1), .DEPTH(`LATENCY_FADDMUL) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in({tag_in, valid_in, do_sub, do_mul}), - .out({tag_out, valid_out, do_sub_r, do_mul_r}) + .in({valid_in, tag_in, do_sub, do_mul}), + .out({valid_out, tag_out, do_sub_r, do_mul_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_div.v b/hw/rtl/fp_cores/VX_fp_div.v index 1cb02f6a..b6c6dbd2 100644 --- a/hw/rtl/fp_cores/VX_fp_div.v +++ b/hw/rtl/fp_cores/VX_fp_div.v @@ -50,14 +50,14 @@ module VX_fp_div #( end VX_shift_register #( - .DATAW(TAGW + 1), + .DATAW(1 + TAGW), .DEPTH(`LATENCY_FDIV) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in ({tag_in, valid_in}), - .out({tag_out, valid_out}) + .in ({valid_in, tag_in}), + .out({valid_out, tag_out}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_ftoi.v b/hw/rtl/fp_cores/VX_fp_ftoi.v index 53e91a93..68d60b27 100644 --- a/hw/rtl/fp_cores/VX_fp_ftoi.v +++ b/hw/rtl/fp_cores/VX_fp_ftoi.v @@ -68,14 +68,14 @@ module VX_fp_ftoi #( end VX_shift_register #( - .DATAW(TAGW + 1 + 1), + .DATAW(1 + TAGW + 1), .DEPTH(`LATENCY_FTOI) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in ({tag_in, valid_in, is_signed}), - .out({tag_out, valid_out, is_signed_r}) + .in ({valid_in, tag_in, is_signed}), + .out({valid_out, tag_out, is_signed_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_itof.v b/hw/rtl/fp_cores/VX_fp_itof.v index 0c8ed277..aaefbe1d 100644 --- a/hw/rtl/fp_cores/VX_fp_itof.v +++ b/hw/rtl/fp_cores/VX_fp_itof.v @@ -68,14 +68,14 @@ module VX_fp_itof #( end VX_shift_register #( - .DATAW(TAGW + 1 + 1), + .DATAW(1 + TAGW + 1), .DEPTH(`LATENCY_ITOF) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in ({tag_in, valid_in, is_signed}), - .out({tag_out, valid_out, is_signed_r}) + .in ({valid_in, tag_in, is_signed}), + .out({valid_out, tag_out, is_signed_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_madd.v b/hw/rtl/fp_cores/VX_fp_madd.v index ee7d2e10..413ba2ef 100644 --- a/hw/rtl/fp_cores/VX_fp_madd.v +++ b/hw/rtl/fp_cores/VX_fp_madd.v @@ -138,14 +138,14 @@ module VX_fp_madd #( end VX_shift_register #( - .DATAW(TAGW + 1 + 1 + 1), + .DATAW(1 + TAGW + 1 + 1), .DEPTH(`LATENCY_FMADD) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in({tag_in, valid_in, do_sub, do_neg}), - .out({tag_out, valid_out, do_sub_r, do_neg_r}) + .in({valid_in, tag_in, do_sub, do_neg}), + .out({valid_out, tag_out, do_sub_r, do_neg_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_sqrt.v b/hw/rtl/fp_cores/VX_fp_sqrt.v index 8938cb4f..6660d202 100644 --- a/hw/rtl/fp_cores/VX_fp_sqrt.v +++ b/hw/rtl/fp_cores/VX_fp_sqrt.v @@ -48,14 +48,14 @@ module VX_fp_sqrt #( end VX_shift_register #( - .DATAW(TAGW + 1), + .DATAW(1 + TAGW), .DEPTH(`LATENCY_FSQRT) ) shift_reg ( .clk(clk), .reset(reset), .enable(enable), - .in ({tag_in, valid_in}), - .out({tag_out, valid_out}) + .in ({valid_in, tag_in}), + .out({valid_out, tag_out}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fpnew.v b/hw/rtl/fp_cores/VX_fpnew.v index 62014883..e664a880 100644 --- a/hw/rtl/fp_cores/VX_fpnew.v +++ b/hw/rtl/fp_cores/VX_fpnew.v @@ -78,7 +78,7 @@ module VX_fpnew wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32; wire [`NUM_THREADS-1:0][31:0] fpu_result; - fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status; + fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status; reg [FOP_BITS-1:0] fpu_op; reg [`FRM_BITS-1:0] fpu_rnd; diff --git a/hw/rtl/libs/VX_cam_buffer.v b/hw/rtl/libs/VX_cam_buffer.v index d74c1452..34192976 100644 --- a/hw/rtl/libs/VX_cam_buffer.v +++ b/hw/rtl/libs/VX_cam_buffer.v @@ -3,8 +3,6 @@ module VX_cam_buffer #( parameter DATAW = 1, parameter SIZE = 1, - parameter RPORTS = 1, - parameter CPORTS = 1, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, @@ -12,13 +10,12 @@ module VX_cam_buffer #( output wire [ADDRW-1:0] write_addr, input wire [DATAW-1:0] write_data, input wire acquire_slot, - input wire [RPORTS-1:0][ADDRW-1:0] read_addr, - output wire [RPORTS-1:0][DATAW-1:0] read_data, - input wire [CPORTS-1:0][ADDRW-1:0] release_addr, - input wire [CPORTS-1:0] release_slot, + input wire [ADDRW-1:0] read_addr, + output wire [DATAW-1:0] read_data, + input wire [ADDRW-1:0] release_addr, + input wire release_slot, output wire full ); - reg [DATAW-1:0] entries [SIZE-1:0]; reg [SIZE-1:0] free_slots, free_slots_n; reg [ADDRW-1:0] write_addr_r; reg full_r; @@ -36,13 +33,12 @@ module VX_cam_buffer #( always @(*) begin free_slots_n = free_slots; - for (integer i = 0; i < CPORTS; i++) begin - if (release_slot[i]) begin - free_slots_n[release_addr[i]] = 1; - end + if (release_slot) begin + free_slots_n[release_addr] = 1; end if (acquire_slot) begin - free_slots_n[write_addr_r] = 0; + assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr); + free_slots_n[write_addr_r] = 0; end end @@ -52,28 +48,33 @@ module VX_cam_buffer #( full_r <= 1'b0; write_addr_r <= ADDRW'(1'b0); end else begin - for (integer i = 0; i < CPORTS; i++) begin - if (release_slot[i]) begin - assert(0 == free_slots[release_addr[i]]) else begin - $display("%t: releasing invalid slot at port %d", $time, release_addr[i]); - end + if (release_slot) begin + assert(0 == free_slots[release_addr]) else begin + $display("%t: releasing invalid slot at port %d", $time, release_addr); end end free_slots <= free_slots_n; write_addr_r <= free_index; full_r <= ~free_valid; - end - - if (acquire_slot) begin - assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr); - entries[write_addr] <= write_data; - end - end - - for (genvar i = 0; i < RPORTS; i++) begin - assign read_data[i] = entries[read_addr[i]]; + end end + VX_dp_ram #( + .DATAW(DATAW), + .SIZE(SIZE), + .BUFFERED(0), + .RWCHECK(0) + ) req_metadata ( + .clk(clk), + .waddr(write_addr), + .raddr(read_addr), + .wren(acquire_slot), + .byteen(1'b1), + .rden(1'b1), + .din(write_data), + .dout(read_data) + ); + assign write_addr = write_addr_r; assign full = full_r; diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divide.v index 6d694204..de406bdb 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divide.v @@ -73,8 +73,8 @@ module VX_divide #( assign quotient = quotient_unqual [WIDTHQ-1:0]; assign remainder = remainder_unqual [WIDTHR-1:0]; end else begin - reg [WIDTHN-1:0] quotient_pipe [0:LATENCY-1]; - reg [WIDTHD-1:0] remainder_pipe [0:LATENCY-1]; + reg [WIDTHN-1:0] quotient_pipe [LATENCY-1:0]; + reg [WIDTHD-1:0] remainder_pipe [LATENCY-1:0]; for (genvar i = 0; i < LATENCY; i++) begin always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index 4e75578c..88156528 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -21,33 +21,40 @@ module VX_dp_ram #( output wire [DATAW-1:0] dout ); + `STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter")) + + localparam DATA32W = DATAW / 32; + localparam BYTEEN32W = BYTEENW / 4; + if (FASTRAM) begin - - if (BUFFERED) begin - - `USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0]; + if (BUFFERED) begin reg [DATAW-1:0] dout_r; if (BYTEENW > 1) begin + `USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end + if (rden) + dout_r <= mem[raddr]; end end else begin + `USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; + if (rden) + dout_r <= mem[raddr]; end end - - always @(posedge clk) begin - if (rden) - dout_r <= mem[raddr]; - end assign dout = dout_r; @@ -55,48 +62,58 @@ module VX_dp_ram #( `UNUSED_VAR (rden) - if (RWCHECK) begin - - `USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0]; + if (RWCHECK) begin if (BYTEENW > 1) begin + `USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end end + assign dout = mem[raddr]; + end else begin + `USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; end + assign dout = mem[raddr]; end - assign dout = mem[raddr]; - end else begin - `USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; - if (BYTEENW > 1) begin + `USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end end + assign dout = mem[raddr]; end else begin + `USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; end - end - assign dout = mem[raddr]; + assign dout = mem[raddr]; + end end end @@ -104,79 +121,88 @@ module VX_dp_ram #( if (BUFFERED) begin - reg [DATAW-1:0] mem [SIZE-1:0]; reg [DATAW-1:0] dout_r; if (BYTEENW > 1) begin + reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end + if (rden) + dout_r <= mem[raddr]; end end else begin + reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; + if (rden) + dout_r <= mem[raddr]; end end - - always @(posedge clk) begin - if (rden) - dout_r <= mem[raddr]; - end assign dout = dout_r; - end else begin `UNUSED_VAR (rden) if (RWCHECK) begin - reg [DATAW-1:0] mem [SIZE-1:0]; - if (BYTEENW > 1) begin + reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end end + assign dout = mem[raddr]; end else begin + reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; end + assign dout = mem[raddr]; end - - assign dout = mem[raddr]; - end else begin - - `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; - if (BYTEENW > 1) begin + `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren) begin - for (integer i = 0; i < BYTEENW; i++) begin - if (byteen[i]) - mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + for (integer j = 0; j < BYTEEN32W; j++) begin + for (integer i = 0; i < 4; i++) begin + if (byteen[j * 4 + i]) + mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; + end end end end + assign dout = mem[raddr]; end else begin + `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; + always @(posedge clk) begin if (wren && byteen) mem[waddr] <= din; end - end - - assign dout = mem[raddr]; + assign dout = mem[raddr]; + end end end end diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index c24e6488..48190e74 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -6,7 +6,7 @@ module VX_generic_queue #( parameter BUFFERED = 0, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1), - parameter FASTRAM = 1 + parameter FASTRAM = 0 ) ( input wire clk, input wire reset, @@ -117,7 +117,7 @@ module VX_generic_queue #( .raddr(rd_ptr_a), .wren(push), .byteen(1'b1), - .rden(pop), + .rden(1'b1), .din(data_in), .dout(data_out) ); @@ -125,11 +125,10 @@ module VX_generic_queue #( end else begin wire [DATAW-1:0] dout; - reg [DATAW-1:0] din_r; + reg [DATAW-1:0] dout_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] rd_ptr_n_r; - reg bypass_r; always @(posedge clk) begin if (reset) begin @@ -151,19 +150,11 @@ module VX_generic_queue #( end end - always @(posedge clk) begin - if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin - bypass_r <= 1; - din_r <= data_in; - end else if (pop) - bypass_r <= 0; - end - VX_dp_ram #( .DATAW(DATAW), .SIZE(SIZE), - .BUFFERED(1), - .RWCHECK(0), + .BUFFERED(0), + .RWCHECK(1), .FASTRAM(FASTRAM) ) dp_ram ( .clk(clk), @@ -171,12 +162,20 @@ module VX_generic_queue #( .raddr(rd_ptr_n_r), .wren(push), .byteen(1'b1), - .rden(pop), + .rden(1'b1), .din(data_in), .dout(dout) ); - assign data_out = bypass_r ? din_r : dout; + always @(posedge clk) begin + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + dout_r <= data_in; + end else if (pop) begin + dout_r <= dout; + end + end + + assign data_out = dout_r; end assign empty = empty_r; diff --git a/hw/rtl/libs/VX_matrix_arbiter.v b/hw/rtl/libs/VX_matrix_arbiter.v index 7ccda7dc..d5eb2f87 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.v +++ b/hw/rtl/libs/VX_matrix_arbiter.v @@ -22,8 +22,8 @@ module VX_matrix_arbiter #( end else begin - reg [N-1:1] state [0:N-1]; - wire [N-1:0] pri [0:N-1]; + reg [N-1:1] state [N-1:0]; + wire [N-1:0] pri [N-1:0]; for (genvar i = 0; i < N; i++) begin for (genvar j = 0; j < N; j++) begin diff --git a/hw/rtl/libs/VX_multiplier.v b/hw/rtl/libs/VX_multiplier.v index ed7c5946..27c8c8e7 100644 --- a/hw/rtl/libs/VX_multiplier.v +++ b/hw/rtl/libs/VX_multiplier.v @@ -47,7 +47,7 @@ module VX_multiplier #( if (LATENCY == 0) begin assign result = result_unqual; end else begin - reg [WIDTHP-1:0] result_pipe [0:LATENCY-1]; + reg [WIDTHP-1:0] result_pipe [LATENCY-1:0]; for (genvar i = 0; i < LATENCY; i++) begin always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_rr_arbiter.v b/hw/rtl/libs/VX_rr_arbiter.v index c876590e..86d30f3a 100644 --- a/hw/rtl/libs/VX_rr_arbiter.v +++ b/hw/rtl/libs/VX_rr_arbiter.v @@ -22,7 +22,7 @@ module VX_rr_arbiter #( end else begin - reg [`CLOG2(N)-1:0] grant_table [0:N-1]; + reg [`CLOG2(N)-1:0] grant_table [N-1:0]; reg [`CLOG2(N)-1:0] state; reg [N-1:0] grant_onehot_r; diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 9c071003..b4ea5feb 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -3,7 +3,7 @@ #include #include -#define ALL_TESTS +//#define ALL_TESTS int main(int argc, char **argv) { bool passed = true; diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 0e85bf48..9bb216ba 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -40,6 +40,7 @@ set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name VERILOG_MACRO FPU_FAST +set_global_assignment -name AUTO_SHIFT_REGISTER_RECOGNITION AUTO set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"