diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 0d17e6da..e9538a7e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -246,12 +246,12 @@ // Size of MUL Request Queue `ifndef MULQ_SIZE -`define MULQ_SIZE 4 +`define MULQ_SIZE 8 `endif // Size of FPU Request Queue `ifndef FPUQ_SIZE -`define FPUQ_SIZE 4 +`define FPUQ_SIZE 8 `endif // Icache Configurable Knobs ////////////////////////////////////////////////// diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 712f0da2..229034f5 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -43,7 +43,7 @@ module VX_fpu_unit #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`FPUQ_SIZE), .FASTRAM (1) - ) req_metadata_buf ( + ) req_metadata ( .clk (clk), .reset (reset), .acquire_slot (fpuq_push), diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index f41872cd..2d16f348 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -119,7 +119,7 @@ module VX_lsu_unit #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .SIZE (`LSUQ_SIZE), .FASTRAM (1) - ) req_metadata_buf ( + ) req_metadata ( .clk (clk), .reset (reset), .write_addr (mbuf_waddr), diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index a1651aeb..710aa1f1 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -19,47 +19,43 @@ module VX_mul_unit #( wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data; wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data; - wire [`NW_BITS-1:0] rsp_wid; - wire [`NUM_THREADS-1:0] rsp_tmask; - wire [31:0] rsp_PC; - wire [`NR_BITS-1:0] rsp_rd; - wire rsp_wb; - wire [MULQ_BITS-1:0] tag_in, tag_out; - wire valid_out, ready_out; - wire mulq_full; - - wire mulq_push = mul_req_if.valid && mul_req_if.ready; - wire mulq_pop = valid_out && ready_out; - - VX_index_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), - .SIZE (`MULQ_SIZE), - .FASTRAM (1) - ) req_metadata_buf ( - .clk (clk), - .reset (reset), - .acquire_slot (mulq_push), - .write_addr (tag_in), - .read_addr (tag_out), - .release_addr (tag_out), - .write_data ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb}), - .read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}), - .release_slot (mulq_pop), - .full (mulq_full) - ); - - wire valid_in = mul_req_if.valid && ~mulq_full; - + wire ready_out; + /////////////////////////////////////////////////////////////////////////// wire [`NUM_THREADS-1:0][31:0] mul_result; - wire [MULQ_BITS-1:0] mul_tag; - wire is_mul_in = (alu_op == `MUL_MUL); - wire is_mul_out; + wire [`NW_BITS-1:0] mul_wid_out; + wire [`NUM_THREADS-1:0] mul_tmask_out; + wire [31:0] mul_PC_out; + wire [`NR_BITS-1:0] mul_rd_out; + wire mul_wb_out; wire mul_valid_out; - wire mul_valid_in = valid_in && !is_div_op; + wire mul_valid_in = mul_req_if.valid && !is_div_op && ~mulq_full; wire mul_ready_in = ready_out || ~mul_valid_out; + + wire mulq_push = mul_valid_in && mul_ready_in; + wire mulq_pop = mul_valid_out && ready_out; + wire mulq_full; + + wire is_mulh_in = (alu_op != `MUL_MUL); + wire is_mulh_out; + + VX_generic_queue #( + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1), + .SIZE (`MULQ_SIZE), + .FASTRAM (1) + ) mul_metadata ( + .clk (clk), + .reset (reset), + .push (mulq_push), + .pop (mulq_pop), + .data_in ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_mulh_in}), + .data_out ({mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out}), + .full (mulq_full), + `UNUSED_PIN (empty), + `UNUSED_PIN (size) + ); for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -83,32 +79,36 @@ module VX_mul_unit #( .result(mul_result_tmp) ); - assign mul_result[i] = is_mul_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32]; + assign mul_result[i] = is_mulh_out ? mul_result_tmp[63:32] : mul_result_tmp[31:0]; end VX_shift_register #( - .DATAW(1 + MULQ_BITS + 1), + .DATAW(1), .DEPTH(`LATENCY_IMUL) ) mul_shift_reg ( .clk(clk), .reset(reset), .enable(mul_ready_in), - .in({mul_valid_in, tag_in, is_mul_in}), - .out({mul_valid_out, mul_tag, is_mul_out}) + .data_in(mul_valid_in), + .data_out(mul_valid_out) ); /////////////////////////////////////////////////////////////////////////// - wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp; + wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp; + wire [`NW_BITS-1:0] div_wid_out; + wire [`NUM_THREADS-1:0] div_tmask_out; + wire [31:0] div_PC_out; + wire [`NR_BITS-1:0] div_rd_out; + wire div_wb_out; - wire is_rem_op = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU); + wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU); wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM); - wire div_valid_in = valid_in && is_div_op; + wire div_valid_in = mul_req_if.valid && is_div_op; wire div_ready_out = ready_out && ~mul_valid_out; // arbitration prioritizes MUL wire div_ready_in; wire div_valid_out; wire is_rem_op_out; - wire [MULQ_BITS-1:0] div_tag; VX_serial_div #( .WIDTHN(32), @@ -116,21 +116,21 @@ module VX_mul_unit #( .WIDTHQ(32), .WIDTHR(32), .LANES(`NUM_THREADS), - .TAGW(MULQ_BITS + 1) + .TAGW(`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1) ) divide ( .clk(clk), - .reset(reset), - .ready_in(div_ready_in), + .reset(reset), .valid_in(div_valid_in), + .ready_in(div_ready_in), .signed_mode(is_signed_div), - .tag_in({tag_in, is_rem_op}), + .tag_in({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_rem_op_in}), .numer(alu_in1), .denom(alu_in2), .quotient(div_result_tmp), .remainder(rem_result_tmp), .ready_out(div_ready_out), .valid_out(div_valid_out), - .tag_out({div_tag, is_rem_op_out}) + .tag_out({div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out}) ); wire [`NUM_THREADS-1:0][31:0] div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp; @@ -140,9 +140,13 @@ module VX_mul_unit #( wire stall_out = ~mul_commit_if.ready && mul_commit_if.valid; assign ready_out = ~stall_out; - assign valid_out = mul_valid_out || div_valid_out; - assign tag_out = mul_valid_out ? mul_tag : div_tag; - wire [`NUM_THREADS-1:0][31:0] result = mul_valid_out ? mul_result : div_result; + wire rsp_valid = mul_valid_out || div_valid_out; + wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out; + wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out; + wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out; + wire [`NR_BITS-1:0] rsp_rd = mul_valid_out ? mul_rd_out : div_rd_out; + wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out; + wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result; VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), @@ -152,11 +156,11 @@ module VX_mul_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}), .data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) ); // can accept new request? - assign mul_req_if.ready = (is_div_op ? div_ready_in : mul_ready_in) && ~mulq_full; + assign mul_req_if.ready = is_div_op ? div_ready_in : (mul_ready_in && ~mulq_full); endmodule \ No newline at end of file diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index f1e63414..4896367a 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -718,8 +718,8 @@ VX_generic_queue #( .clk (clk), .reset (reset), .push (cci_rdq_push), - .data_in (cci_rdq_din), .pop (cci_rdq_pop), + .data_in (cci_rdq_din), .data_out (cci_rdq_dout), .empty (cci_rdq_empty), `UNUSED_PIN (full), diff --git a/hw/rtl/fp_cores/VX_fp_addmul.v b/hw/rtl/fp_cores/VX_fp_addmul.v index 82b4dda3..67f22392 100644 --- a/hw/rtl/fp_cores/VX_fp_addmul.v +++ b/hw/rtl/fp_cores/VX_fp_addmul.v @@ -184,8 +184,8 @@ module VX_fp_addmul #( .clk(clk), .reset(reset), .enable(enable), - .in({valid_in, tag_in, do_sub, do_mul}), - .out({valid_out, tag_out, do_sub_r, do_mul_r}) + .data_in({valid_in, tag_in, do_sub, do_mul}), + .data_out({valid_out, tag_out, do_sub_r, do_mul_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_div.v b/hw/rtl/fp_cores/VX_fp_div.v index b6c6dbd2..5eabdb2a 100644 --- a/hw/rtl/fp_cores/VX_fp_div.v +++ b/hw/rtl/fp_cores/VX_fp_div.v @@ -56,8 +56,8 @@ module VX_fp_div #( .clk(clk), .reset(reset), .enable(enable), - .in ({valid_in, tag_in}), - .out({valid_out, tag_out}) + .data_in ({valid_in, tag_in}), + .data_out({valid_out, tag_out}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_ftoi.v b/hw/rtl/fp_cores/VX_fp_ftoi.v index 68d60b27..5c1b7ff2 100644 --- a/hw/rtl/fp_cores/VX_fp_ftoi.v +++ b/hw/rtl/fp_cores/VX_fp_ftoi.v @@ -74,8 +74,8 @@ module VX_fp_ftoi #( .clk(clk), .reset(reset), .enable(enable), - .in ({valid_in, tag_in, is_signed}), - .out({valid_out, tag_out, is_signed_r}) + .data_in ({valid_in, tag_in, is_signed}), + .data_out({valid_out, tag_out, is_signed_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_itof.v b/hw/rtl/fp_cores/VX_fp_itof.v index aaefbe1d..b2318372 100644 --- a/hw/rtl/fp_cores/VX_fp_itof.v +++ b/hw/rtl/fp_cores/VX_fp_itof.v @@ -74,8 +74,8 @@ module VX_fp_itof #( .clk(clk), .reset(reset), .enable(enable), - .in ({valid_in, tag_in, is_signed}), - .out({valid_out, tag_out, is_signed_r}) + .data_in ({valid_in, tag_in, is_signed}), + .data_out({valid_out, tag_out, is_signed_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_madd.v b/hw/rtl/fp_cores/VX_fp_madd.v index 413ba2ef..7166ddad 100644 --- a/hw/rtl/fp_cores/VX_fp_madd.v +++ b/hw/rtl/fp_cores/VX_fp_madd.v @@ -144,8 +144,8 @@ module VX_fp_madd #( .clk(clk), .reset(reset), .enable(enable), - .in({valid_in, tag_in, do_sub, do_neg}), - .out({valid_out, tag_out, do_sub_r, do_neg_r}) + .data_in({valid_in, tag_in, do_sub, do_neg}), + .data_out({valid_out, tag_out, do_sub_r, do_neg_r}) ); assign ready_in = enable; diff --git a/hw/rtl/fp_cores/VX_fp_sqrt.v b/hw/rtl/fp_cores/VX_fp_sqrt.v index 6660d202..5a897ccc 100644 --- a/hw/rtl/fp_cores/VX_fp_sqrt.v +++ b/hw/rtl/fp_cores/VX_fp_sqrt.v @@ -54,8 +54,8 @@ module VX_fp_sqrt #( .clk(clk), .reset(reset), .enable(enable), - .in ({valid_in, tag_in}), - .out({valid_out, tag_out}) + .data_in ({valid_in, tag_in}), + .data_out({valid_out, tag_out}) ); assign ready_in = enable; diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divider.v similarity index 99% rename from hw/rtl/libs/VX_divide.v rename to hw/rtl/libs/VX_divider.v index 19531c80..5cb38cfa 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divider.v @@ -1,6 +1,6 @@ `include "VX_platform.vh" -module VX_divide #( +module VX_divider #( parameter WIDTHN = 1, parameter WIDTHD = 1, parameter WIDTHQ = 1, diff --git a/hw/rtl/libs/VX_shift_register.v b/hw/rtl/libs/VX_shift_register.v index a82a4607..e7721368 100644 --- a/hw/rtl/libs/VX_shift_register.v +++ b/hw/rtl/libs/VX_shift_register.v @@ -1,14 +1,105 @@ `include "VX_platform.vh" module VX_shift_register #( + parameter DATAW = 1, + parameter RESETW = DATAW, + parameter DEPTH = 1 +) ( + input wire clk, + input wire reset, + input wire enable, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out +); + if (RESETW != 0) begin + if (RESETW == DATAW) begin + + VX_shift_register_wr #( + .DATAW (DATAW), + .DEPTH (DEPTH) + ) sr ( + .clk (clk), + .reset (reset), + .enable (enable), + .data_in (data_in), + .data_out (data_out) + ); + + end else begin + + VX_shift_register_wr #( + .DATAW (DATAW), + .DEPTH (DEPTH) + ) sr_wr ( + .clk (clk), + .reset (reset), + .enable (enable), + .data_in (data_in[DATAW-1:DATAW-RESETW]), + .data_out (data_out[DATAW-1:DATAW-RESETW]) + ); + + VX_shift_register_nr #( + .DATAW (DATAW), + .DEPTH (DEPTH) + ) sr_nr ( + .clk (clk), + .enable (enable), + .data_in (data_in[DATAW-RESETW-1:0]), + .data_out (data_out[DATAW-RESETW-1:0]) + ); + + end + + end else begin + + `UNUSED_VAR (reset) + + VX_shift_register_nr #( + .DATAW (DATAW), + .DEPTH (DEPTH) + ) sr ( + .clk (clk), + .enable (enable), + .data_in (data_in), + .data_out (data_out) + ); + + end + +endmodule + +module VX_shift_register_nr #( + parameter DATAW = 1, + parameter DEPTH = 1 +) ( + input wire clk, + input wire enable, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out +); + reg [DATAW-1:0] entries [DEPTH-1:0]; + + always @(posedge clk) begin + if (enable) begin + for (integer i = DEPTH-1; i > 0; --i) + entries[i] <= entries[i-1]; + entries[0] <= data_in; + end + end + + assign data_out = entries [DEPTH-1]; + +endmodule + +module VX_shift_register_wr #( parameter DATAW = 1, parameter DEPTH = 1 ) ( input wire clk, input wire reset, input wire enable, - input wire [DATAW-1:0] in, - output wire [DATAW-1:0] out + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out ); reg [DEPTH-1:0][DATAW-1:0] entries; @@ -19,7 +110,7 @@ module VX_shift_register #( entries <= (DEPTH * DATAW)'(0); end else begin if (enable) begin - entries <= in; + entries <= data_in; end end end @@ -31,12 +122,12 @@ module VX_shift_register #( entries <= (DEPTH * DATAW)'(0); end else begin if (enable) begin - entries <= {entries[DEPTH-2:0], in}; + entries <= {entries[DEPTH-2:0], data_in}; end end end end - assign out = entries [DEPTH-1]; + assign data_out = entries [DEPTH-1]; endmodule \ No newline at end of file