diff --git a/hw/modelsim/vortex_tb.v b/hw/modelsim/vortex_tb.v index 4521244a..03bd4b1e 100644 --- a/hw/modelsim/vortex_tb.v +++ b/hw/modelsim/vortex_tb.v @@ -102,28 +102,27 @@ reg[31:0] io_data; .icache_request_pc_address (icache_request_pc_address), .io_valid (io_valid), .io_data (io_data), - .o_m_read_addr_d (o_m_read_addr_d), - .o_m_evict_addr_d (o_m_evict_addr_d), - .o_m_valid_d (o_m_valid_d), - .o_m_writedata_d (o_m_writedata_d), - .o_m_read_or_write_d (o_m_read_or_write_d), - .i_m_readdata_d (i_m_readdata_d), - .i_m_ready_d (i_m_ready_d), - .o_m_read_addr_i (o_m_read_addr_i), - .o_m_evict_addr_i (o_m_evict_addr_i), - .o_m_valid_i (o_m_valid_i), - .o_m_writedata_i (o_m_writedata_i), - .o_m_read_or_write_i (o_m_read_or_write_i), - .i_m_readdata_i (i_m_readdata_i), - .i_m_ready_i (i_m_ready_i), - .ebreak_o (out_ebreak) + .m_read_addr_d (o_m_read_addr_d), + .m_evict_addr_d (o_m_evict_addr_d), + .m_valid_d (o_m_valid_d), + .m_writedata_d (o_m_writedata_d), + .m_read_or_write_d (o_m_read_or_write_d), + .m_readdata_d (i_m_readdata_d), + .m_ready_d (i_m_ready_d), + .m_read_addr (o_m_read_addr_i), + .m_evict_addr (o_m_evict_addr_i), + .m_valid (o_m_valid_i), + .writedata (o_m_writedata_i), + .m_read_or_write (o_m_read_or_write_i), + .m_readdata (i_m_readdata_i), + .m_ready (i_m_ready_i), + .ebreak (out_ebreak) ); always @(negedge clk) begin ibus_driver(clk, o_m_read_addr_i, o_m_evict_addr_i, o_m_valid_i, o_m_writedata_i, o_m_read_or_write_i, `ICACHE_BANKS, `ICACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_i, i_m_ready_i); dbus_driver(clk, o_m_read_addr_d, o_m_evict_addr_d, o_m_valid_d, o_m_writedata_d, o_m_read_or_write_d, `DCACHE_BANKS, `DCACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_d, i_m_ready_d); - io_handler (clk, io_valid, io_data); - + io_handler (clk, io_valid, io_data); end always @(posedge clk) begin diff --git a/hw/rtl/VX_alu.v b/hw/rtl/VX_alu.v index 805856fe..9f1c95e7 100644 --- a/hw/rtl/VX_alu.v +++ b/hw/rtl/VX_alu.v @@ -3,15 +3,15 @@ module VX_alu ( input wire clk, input wire reset, - input wire[31:0] a_i, - input wire[31:0] b_i, - input wire rs2_src_i, - input wire[31:0] itype_immed_i, - input wire[19:0] upper_immed_i, - input wire[4:0] alu_op_i, - input wire[31:0] curr_PC_i, - output reg[31:0] alu_result_o, - output reg alu_stall_o + input wire[31:0] src_a, + input wire[31:0] src_b, + input wire src_rs2, + input wire[31:0] itype_immed, + input wire[19:0] upper_immed, + input wire[4:0] alu_op, + input wire[31:0] curr_PC, + output reg[31:0] alu_result, + output reg alu_stall ); localparam div_pipeline_len = 20; @@ -79,18 +79,18 @@ module VX_alu ( // MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned) wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; - assign mul_data_a = (alu_op_i == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed; - assign mul_data_b = (alu_op_i == `MULHU || alu_op_i == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed; + assign mul_data_a = (alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed; + assign mul_data_b = (alu_op == `MULHU || alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed; reg [15:0] curr_inst_delay; reg [15:0] inst_delay; reg inst_was_stalling; wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0; - assign alu_stall_o = inst_delay_stall; + assign alu_stall = inst_delay_stall; always @(*) begin - case(alu_op_i) + case(alu_op) `DIV, `DIVU, `REM, @@ -100,7 +100,7 @@ module VX_alu ( `MULHSU, `MULHU: curr_inst_delay = mul_pipeline_len; default: curr_inst_delay = 0; - endcase // alu_op_i + endcase // alu_op end always @(posedge clk) begin @@ -127,80 +127,80 @@ module VX_alu ( wire which_in2; wire[31:0] upper_immed; - assign which_in2 = rs2_src_i == `RS2_IMMED; + assign which_in2 = src_rs2 == `RS2_IMMED; - assign ALU_in1 = a_i; - assign ALU_in2 = which_in2 ? itype_immed_i : b_i; + assign ALU_in1 = src_a; + assign ALU_in2 = which_in2 ? itype_immed : src_b; - assign upper_immed = {upper_immed_i, {12{1'b0}}}; + assign upper_immed = {upper_immed, {12{1'b0}}}; always @(*) begin - case(alu_op_i) - `ADD: alu_result_o = $signed(ALU_in1) + $signed(ALU_in2); - `SUB: alu_result_o = $signed(ALU_in1) - $signed(ALU_in2); - `SLLA: alu_result_o = ALU_in1 << ALU_in2[4:0]; - `SLT: alu_result_o = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; - `SLTU: alu_result_o = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; - `XOR: alu_result_o = ALU_in1 ^ ALU_in2; - `SRL: alu_result_o = ALU_in1 >> ALU_in2[4:0]; - `SRA: alu_result_o = $signed(ALU_in1) >>> ALU_in2[4:0]; - `OR: alu_result_o = ALU_in1 | ALU_in2; - `AND: alu_result_o = ALU_in2 & ALU_in1; - `SUBU: alu_result_o = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; - `LUI_ALU: alu_result_o = upper_immed; - `AUIPC_ALU: alu_result_o = $signed(curr_PC_i) + $signed(upper_immed); + case(alu_op) + `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); + `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); + `SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; + `SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; + `SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; + `XOR: alu_result = ALU_in1 ^ ALU_in2; + `SRL: alu_result = ALU_in1 >> ALU_in2[4:0]; + `SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; + `OR: alu_result = ALU_in1 | ALU_in2; + `AND: alu_result = ALU_in2 & ALU_in1; + `SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; + `LUI_ALU: alu_result = upper_immed; + `AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed); // TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible? - `MUL: alu_result_o = mul_result[31:0]; - `MULH: alu_result_o = mul_result[63:32]; - `MULHSU: alu_result_o = mul_result[63:32]; - `MULHU: alu_result_o = mul_result[63:32]; - `DIV: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; - `DIVU: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; - `REM: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; - `REMU: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; - default: alu_result_o = 32'h0; - endcase // alu_op_i + `MUL: alu_result = mul_result[31:0]; + `MULH: alu_result = mul_result[63:32]; + `MULHSU: alu_result = mul_result[63:32]; + `MULHU: alu_result = mul_result[63:32]; + `DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; + `DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; + `REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; + `REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; + default: alu_result = 32'h0; + endcase // alu_op end `else wire which_in2; - wire[31:0] upper_immed; + wire[31:0] upper_immed_s; - assign which_in2 = rs2_src_i == `RS2_IMMED; + assign which_in2 = src_rs2 == `RS2_IMMED; - assign ALU_in1 = a_i; + assign ALU_in1 = src_a; - assign ALU_in2 = which_in2 ? itype_immed_i : b_i; + assign ALU_in2 = which_in2 ? itype_immed : src_b; - assign upper_immed = {upper_immed_i, {12{1'b0}}}; + assign upper_immed_s = {upper_immed, {12{1'b0}}}; always @(*) begin - case(alu_op_i) - `ADD: alu_result_o = $signed(ALU_in1) + $signed(ALU_in2); - `SUB: alu_result_o = $signed(ALU_in1) - $signed(ALU_in2); - `SLLA: alu_result_o = ALU_in1 << ALU_in2[4:0]; - `SLT: alu_result_o = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; - `SLTU: alu_result_o = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; - `XOR: alu_result_o = ALU_in1 ^ ALU_in2; - `SRL: alu_result_o = ALU_in1 >> ALU_in2[4:0]; - `SRA: alu_result_o = $signed(ALU_in1) >>> ALU_in2[4:0]; - `OR: alu_result_o = ALU_in1 | ALU_in2; - `AND: alu_result_o = ALU_in2 & ALU_in1; - `SUBU: alu_result_o = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; - `LUI_ALU: alu_result_o = upper_immed; - `AUIPC_ALU: alu_result_o = $signed(curr_PC_i) + $signed(upper_immed); + case(alu_op) + `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); + `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); + `SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; + `SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; + `SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; + `XOR: alu_result = ALU_in1 ^ ALU_in2; + `SRL: alu_result = ALU_in1 >> ALU_in2[4:0]; + `SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; + `OR: alu_result = ALU_in1 | ALU_in2; + `AND: alu_result = ALU_in2 & ALU_in1; + `SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; + `LUI_ALU: alu_result = upper_immed_s; + `AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed_s); // TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible? - `MUL: alu_result_o = mul_result[31:0]; - `MULH: alu_result_o = mul_result[63:32]; - `MULHSU: alu_result_o = mul_result[63:32]; - `MULHU: alu_result_o = mul_result[63:32]; - `DIV: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; - `DIVU: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; - `REM: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; - `REMU: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; - default: alu_result_o = 32'h0; - endcase // alu_op_i + `MUL: alu_result = mul_result[31:0]; + `MULH: alu_result = mul_result[63:32]; + `MULHSU: alu_result = mul_result[63:32]; + `MULHU: alu_result = mul_result[63:32]; + `DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; + `DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; + `REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; + `REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; + default: alu_result = 32'h0; + endcase // alu_op end `endif diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index d0595265..316cbffa 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -10,8 +10,8 @@ module VX_back_end #( VX_gpu_dcache_rsp_if dcache_rsp_if, VX_gpu_dcache_req_if dcache_req_if, - output wire mem_delay_o, - output wire exec_delay_o, + output wire mem_delay, + output wire exec_delay, output wire gpr_stage_delay, VX_jal_rsp_if jal_rsp_if, VX_branch_rsp_if branch_rsp_if, @@ -65,8 +65,8 @@ VX_gpr_stage gpr_stage ( .csr_req_if (csr_req_if), .stall_gpr_csr (stall_gpr_csr), // End new - .memory_delay (mem_delay_o), - .exec_delay (exec_delay_o), + .memory_delay (mem_delay), + .exec_delay (exec_delay), .gpr_stage_delay (gpr_stage_delay) ); @@ -77,8 +77,8 @@ VX_lsu load_store_unit ( .mem_wb_if (mem_wb_if), .dcache_rsp_if (dcache_rsp_if), .dcache_req_if (dcache_req_if), - .delay_o (mem_delay_o), - .no_slot_mem_i (no_slot_mem) + .delay (mem_delay), + .no_slot_mem (no_slot_mem) ); VX_exec_unit exec_unit ( @@ -88,8 +88,8 @@ VX_exec_unit exec_unit ( .inst_exec_wb_if (inst_exec_wb_if), .jal_rsp_if (jal_rsp_if), .branch_rsp_if (branch_rsp_if), - .delay_o (exec_delay_o), - .no_slot_exec_i (no_slot_exec) + .delay (exec_delay), + .no_slot_exec (no_slot_exec) ); VX_gpgpu_inst gpgpu_inst ( @@ -117,9 +117,9 @@ VX_writeback wb ( .csr_wb_if (csr_wb_if), .writeback_if (writeback_temp_if), - .no_slot_mem_o (no_slot_mem), - .no_slot_exec_o (no_slot_exec), - .no_slot_csr_o (no_slot_csr) + .no_slot_mem (no_slot_mem), + .no_slot_exec (no_slot_exec), + .no_slot_csr (no_slot_csr) ); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 7d4318d5..7e44e2c0 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -342,7 +342,7 @@ // Number of Word requests per cycle {1, 2, 4, 8, ...} `ifndef L2NUM_REQUESTS -`define L2NUM_REQUESTS (2*`NUM_CORES_PER_CLUSTER) +`define L2NUM_REQUESTS (2*`NUM_CORES) `endif // Number of cycles to complete stage 1 (read from memory) diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 0cd29da0..237cc390 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -4,19 +4,19 @@ module VX_csr_data ( input wire clk, // Clock input wire reset, - input wire[`CSR_ADDR_SIZE-1:0] read_csr_address_i, - input wire write_valid_i, - input wire[`CSR_WIDTH-1:0] write_csr_data_i, + input wire[`CSR_ADDR_SIZE-1:0] read_csr_address, + input wire write_valid, + input wire[`CSR_WIDTH-1:0] write_csr_data, `IGNORE_WARNINGS_BEGIN // We use a smaller storage for CSRs than the standard 4KB in RISC-V - input wire[`CSR_ADDR_SIZE-1:0] write_csr_address_i, + input wire[`CSR_ADDR_SIZE-1:0] write_csr_address, `IGNORE_WARNINGS_END - output wire[31:0] read_csr_data_o, + output wire[31:0] read_csr_data, // For instruction retire counting - input wire writeback_valid_i + input wire writeback_valid ); // wire[`NUM_THREADS-1:0][31:0] thread_ids; // wire[`NUM_THREADS-1:0][31:0] warp_ids; @@ -41,21 +41,21 @@ module VX_csr_data ( wire read_instret; wire read_instreth; - assign read_cycle = read_csr_address_i == `CSR_CYCL_L; - assign read_cycleh = read_csr_address_i == `CSR_CYCL_H; - assign read_instret = read_csr_address_i == `CSR_INST_L; - assign read_instreth = read_csr_address_i == `CSR_INST_H; + assign read_cycle = read_csr_address == `CSR_CYCL_L; + assign read_cycleh = read_csr_address == `CSR_CYCL_H; + assign read_instret = read_csr_address == `CSR_INST_L; + assign read_instreth = read_csr_address == `CSR_INST_H; wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr; // cast address to physical CSR range - assign read_addr = $size(read_addr)'(read_csr_address_i); - assign write_addr = $size(write_addr)'(write_csr_address_i); + assign read_addr = $size(read_addr)'(read_csr_address); + assign write_addr = $size(write_addr)'(write_csr_address); - // wire thread_select = read_csr_address_i == 12'h20; - // wire warp_select = read_csr_address_i == 12'h21; + // wire thread_select = read_csr_address == 12'h20; + // wire warp_select = read_csr_address == 12'h21; - // assign read_csr_data_o = thread_select ? thread_ids : + // assign read_csr_data = thread_select ? thread_ids : // warp_select ? warp_ids : // 0; @@ -67,16 +67,16 @@ module VX_csr_data ( instret <= 0; end else begin cycle <= cycle + 1; - if (write_valid_i) begin - csr[write_addr] <= write_csr_data_i; + if (write_valid) begin + csr[write_addr] <= write_csr_data; end - if (writeback_valid_i) begin + if (writeback_valid) begin instret <= instret + 1; end end end - assign read_csr_data_o = read_cycle ? cycle[31:0] : + assign read_csr_data = read_cycle ? cycle[31:0] : read_cycleh ? cycle[63:32] : read_instret ? instret[31:0] : read_instreth ? instret[63:32] : diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index ae711d34..3921c6b0 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -33,12 +33,12 @@ module VX_csr_pipe #( VX_csr_data csr_data( .clk (clk), .reset (reset), - .read_csr_address_i (csr_req_if.csr_address), - .write_valid_i (is_csr_s2), - .write_csr_data_i (csr_updated_data_s2[`CSR_WIDTH-1:0]), - .write_csr_address_i(csr_address_s2), - .read_csr_data_o (csr_read_data_unqual), - .writeback_valid_i (writeback) + .read_csr_address (csr_req_if.csr_address), + .write_valid (is_csr_s2), + .write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]), + .write_csr_address (csr_address_s2), + .read_csr_data (csr_read_data_unqual), + .writeback_valid (writeback) ); reg [31:0] csr_updated_data; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 9063aae4..75b6b7d0 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -8,9 +8,30 @@ // `define ASIC 1 // `define SYN_FUNC 1 +/////////////////////////////////////////////////////////////////////////////// + +`define DEBUG_BEGIN /* verilator lint_off UNUSED */ + +`define DEBUG_END /* verilator lint_on UNUSED */ + +`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \ + /* verilator lint_off PINCONNECTEMPTY */ \ + /* verilator lint_off DECLFILENAME */ + +`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \ + /* verilator lint_on PINCONNECTEMPTY */ \ + /* verilator lint_on DECLFILENAME */ + +`define STRINGIFY(x) `"x`" + +`define STATIC_ASSERT(cond, msg) \ + generate \ + if (!(cond)) $error(msg); \ + endgenerate + `define LOG2UP(x) ((x > 1) ? $clog2(x) : 1) -`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS) +/////////////////////////////////////////////////////////////////////////////// `define NW_BITS (`LOG2UP(`NUM_WARPS)) @@ -119,7 +140,7 @@ // IO BUS `define IO_BUS_ADDR 32'h00010000 -// ======================= Dcache Configurable Knobs ========================== +////////////////////////// Dcache Configurable Knobs ////////////////////////// // Function ID `define DFUNC_ID 0 @@ -133,7 +154,7 @@ // Word size in bits `define DWORD_SIZE_BITS (`DWORD_SIZE_BYTES * 8) -// ======================= Icache Configurable Knobs ========================== +////////////////////////// Icache Configurable Knobs ////////////////////////// // Function ID `define IFUNC_ID 1 @@ -144,7 +165,7 @@ // Bank Number of words in a line `define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES) -// ======================= SM Configurable Knobs ============================== +////////////////////////// SM Configurable Knobs ////////////////////////////// // Function ID `define SFUNC_ID 2 @@ -155,7 +176,7 @@ // Bank Number of words in a line `define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES) -// ======================= L2cache Configurable Knobs ========================= +////////////////////////// L2cache Configurable Knobs ///////////////////////// // Function ID `define L2FUNC_ID 3 @@ -166,7 +187,7 @@ // Bank Number of words in a line `define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES) -// ======================= L3cache Configurable Knobs ========================= +////////////////////////// L3cache Configurable Knobs ///////////////////////// // Function ID `define L3FUNC_ID 3 @@ -177,25 +198,5 @@ // Bank Number of words in a line `define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES) -//============================================================================= - -`define DEBUG_BEGIN /* verilator lint_off UNUSED */ -`define DEBUG_END /* verilator lint_on UNUSED */ - -`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \ - /* verilator lint_off PINCONNECTEMPTY */ \ - /* verilator lint_off DECLFILENAME */ - -`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \ - /* verilator lint_on PINCONNECTEMPTY */ \ - /* verilator lint_on DECLFILENAME */ - -`define STRINGIFY(x) `"x`" - -`define STATIC_ASSERT(cond, msg) \ - generate \ - if (!(cond)) $error(msg); \ - endgenerate - // VX_DEFINE `endif diff --git a/hw/rtl/VX_exec_unit.v b/hw/rtl/VX_exec_unit.v index e7d3bda9..8b91c135 100644 --- a/hw/rtl/VX_exec_unit.v +++ b/hw/rtl/VX_exec_unit.v @@ -10,12 +10,12 @@ module VX_exec_unit ( // Writeback VX_inst_exec_wb_if inst_exec_wb_if, // JAL Response - VX_jal_rsp_if jal_rsp_if, + VX_jal_rsp_if jal_rsp_if, // Branch Response - VX_branch_rsp_if branch_rsp_if, + VX_branch_rsp_if branch_rsp_if, - input wire no_slot_exec_i, - output wire delay_o + input wire no_slot_exec, + output wire delay ); wire[`NUM_THREADS-1:0][31:0] in_a_reg_data; @@ -50,15 +50,15 @@ module VX_exec_unit ( VX_alu alu( .clk (clk), .reset (reset), - .a_i (in_a_reg_data[index_out_reg]), - .b_i (in_b_reg_data[index_out_reg]), - .rs2_src_i (in_rs2_src), - .itype_immed_i (in_itype_immed), - .upper_immed_i (in_upper_immed), - .alu_op_i (in_alu_op), - .curr_PC_i (in_curr_PC), - .alu_result_o (alu_result[index_out_reg]), - .alu_stall_o (alu_stall[index_out_reg]) + .src_a (in_a_reg_data[index_out_reg]), + .src_b (in_b_reg_data[index_out_reg]), + .src_rs2 (in_rs2_src), + .itype_immed (in_itype_immed), + .upper_immed (in_upper_immed), + .alu_op (in_alu_op), + .curr_PC (in_curr_PC), + .alu_result (alu_result[index_out_reg]), + .alu_stall (alu_stall[index_out_reg]) ); end endgenerate @@ -66,7 +66,7 @@ module VX_exec_unit ( wire internal_stall; assign internal_stall = |alu_stall; - assign delay_o = no_slot_exec_i || internal_stall; + assign delay = no_slot_exec || internal_stall; `DEBUG_BEGIN wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index; diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index 8754977f..e36ba5af 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -10,7 +10,7 @@ module VX_fetch ( input wire[`NW_BITS-1:0] icache_stage_wid, input wire[`NUM_THREADS-1:0] icache_stage_valids, - output wire ebreak_o, + output wire ebreak, VX_jal_rsp_if jal_rsp_if, VX_branch_rsp_if branch_rsp_if, VX_inst_meta_if fe_inst_meta_fi, @@ -86,7 +86,7 @@ module VX_fetch ( .thread_mask (thread_mask), .warp_num (warp_num), .warp_pc (warp_pc), - .ebreak_o (ebreak_o), + .ebreak (ebreak), .scheduled_warp (scheduled_warp) ); diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index 26988971..1de28550 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -52,7 +52,7 @@ module VX_front_end ( .warp_ctl_if (warp_ctl_if), .icache_stage_delay (icache_stage_delay), .branch_rsp_if (branch_rsp_if), - .ebreak_o (vortex_ebreak), // fetch_ebreak + .ebreak (vortex_ebreak), // fetch_ebreak .fe_inst_meta_fi (fe_inst_meta_fi) ); @@ -61,7 +61,7 @@ module VX_front_end ( VX_f_d_reg f_i_reg( .clk (clk), .reset (reset), - .freeze_i (freeze_fi_reg), + .freeze (freeze_fi_reg), .fe_inst_meta_fd(fe_inst_meta_fi), .fd_inst_meta_de(fe_inst_meta_fi2) ); @@ -82,7 +82,7 @@ module VX_front_end ( VX_i_d_reg i_d_reg( .clk (clk), .reset (reset), - .freeze_i (total_freeze), + .freeze (total_freeze), .fe_inst_meta_fd (fe_inst_meta_id), .fd_inst_meta_de (fd_inst_meta_de) ); @@ -100,8 +100,8 @@ module VX_front_end ( VX_d_e_reg d_e_reg( .clk (clk), .reset (reset), - .branch_stall_i (no_br_stall), - .freeze_i (total_freeze), + .branch_stall (no_br_stall), + .freeze (total_freeze), .frE_to_bckE_req_if (frE_to_bckE_req_if), .bckE_req_if (bckE_req_if) ); diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index c8921fcc..e6e5a4ea 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -3,17 +3,17 @@ module VX_gpr ( input wire clk, input wire reset, - input wire valid_write_request_i, + input wire valid_write_request, VX_gpr_read_if gpr_read_if, VX_wb_if writeback_if, - output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_o, - output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_o + output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data, + output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data ); wire write_enable; `ifndef ASIC - assign write_enable = valid_write_request_i && ((writeback_if.wb != 0)) && (writeback_if.rd != 0); + assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0); byte_enabled_simple_dual_port_ram first_ram( .we (write_enable), @@ -24,11 +24,11 @@ module VX_gpr ( .raddr2(gpr_read_if.rs2), .be (writeback_if.wb_valid), .wdata (writeback_if.write_data), - .q1 (a_reg_data_o), - .q2 (b_reg_data_o) + .q1 (a_reg_data), + .q2 (b_reg_data) ); `else - assign write_enable = valid_write_request_i && ((writeback_if.wb != 0)); + assign write_enable = valid_write_request && ((writeback_if.wb != 0)); wire going_to_write = write_enable & (|writeback_if.wb_valid); wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; @@ -56,13 +56,13 @@ module VX_gpr ( begin for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1) begin - assign a_reg_data_o[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit]; - assign b_reg_data_o[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit]; + assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit]; + assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit]; end end `else - assign a_reg_data_o = temp_a; - assign b_reg_data_o = temp_b; + assign a_reg_data = temp_a; + assign b_reg_data = temp_b; `endif wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 33fea7fc..c38f3493 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -49,14 +49,14 @@ module VX_gpr_stage ( VX_gpr_data_if gpr_datf_if(); VX_gpr_wrapper grp_wrapper ( - .clk (clk), - .reset (reset), - .writeback_if(writeback_if), - .gpr_read_if (gpr_read_if), - .gpr_jal_if (gpr_jal_if), + .clk (clk), + .reset (reset), + .writeback_if (writeback_if), + .gpr_read_if (gpr_read_if), + .gpr_jal_if (gpr_jal_if), - .a_reg_data_o (gpr_datf_if.a_reg_data), - .b_reg_data_o (gpr_datf_if.b_reg_data) + .a_reg_data (gpr_datf_if.a_reg_data), + .b_reg_data (gpr_datf_if.b_reg_data) ); // assign bckE_req_if.is_csr = is_csr; diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index e5ceea56..bb17aee9 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -7,8 +7,8 @@ module VX_gpr_wrapper ( VX_wb_if writeback_if, VX_gpr_jal_if gpr_jal_if, - output wire[`NUM_THREADS-1:0][31:0] a_reg_data_o, - output wire[`NUM_THREADS-1:0][31:0] b_reg_data_o + output wire[`NUM_THREADS-1:0][31:0] a_reg_data, + output wire[`NUM_THREADS-1:0][31:0] b_reg_data ); wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data; @@ -23,8 +23,8 @@ module VX_gpr_wrapper ( endgenerate `ifndef ASIC - assign a_reg_data_o = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num])); - assign b_reg_data_o = (temp_b_reg_data[gpr_read_if.warp_num]); + assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num])); + assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]); `else wire zer = 0; @@ -41,8 +41,8 @@ module VX_gpr_wrapper ( .out (old_warp_num) ); - assign a_reg_data_o = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); - assign b_reg_data_o = (temp_b_reg_data[old_warp_num]); + assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); + assign b_reg_data = (temp_b_reg_data[old_warp_num]); `endif @@ -54,11 +54,11 @@ module VX_gpr_wrapper ( VX_gpr gpr( .clk (clk), .reset (reset), - .valid_write_request_i (valid_write_request), + .valid_write_request (valid_write_request), .gpr_read_if (gpr_read_if), .writeback_if (writeback_if), - .a_reg_data_o (temp_a_reg_data[warp_index]), - .b_reg_data_o (temp_b_reg_data[warp_index]) + .a_reg_data (temp_a_reg_data[warp_index]), + .b_reg_data (temp_b_reg_data[warp_index]) ); end diff --git a/hw/rtl/VX_lsu.v b/hw/rtl/VX_lsu.v index 0425b700..1af6b804 100644 --- a/hw/rtl/VX_lsu.v +++ b/hw/rtl/VX_lsu.v @@ -3,7 +3,7 @@ module VX_lsu ( input wire clk, input wire reset, - input wire no_slot_mem_i, + input wire no_slot_mem, VX_lsu_req_if lsu_req_if, // Write back to GPR @@ -11,7 +11,7 @@ module VX_lsu ( VX_gpu_dcache_rsp_if dcache_rsp_if, VX_gpu_dcache_req_if dcache_req_if, - output wire delay_o + output wire delay ); // Generate Addresses wire[`NUM_THREADS-1:0][31:0] address; @@ -38,7 +38,7 @@ module VX_lsu ( ) lsu_buffer( .clk (clk), .reset(reset), - .stall(delay_o), + .stall(delay), .flush(zero), .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}), .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) @@ -56,10 +56,10 @@ module VX_lsu ( assign dcache_req_if.core_req_pc = use_pc; // Core can't accept response - assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem_i; + assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; // Cache can't accept request - assign delay_o = ~dcache_req_if.core_req_ready; + assign delay = ~dcache_req_if.core_req_ready; // Core Response assign mem_wb_if.rd = dcache_rsp_if.core_rsp_read; diff --git a/hw/rtl/VX_warp.v b/hw/rtl/VX_warp.v index f04c4a8d..9a31e35b 100644 --- a/hw/rtl/VX_warp.v +++ b/hw/rtl/VX_warp.v @@ -6,17 +6,17 @@ module VX_warp ( input wire reset, input wire stall, input wire remove, - input wire[`NUM_THREADS-1:0] in_thread_mask, - input wire in_change_mask, - input wire in_jal, - input wire[31:0] in_jal_dest, - input wire in_branch_dir, - input wire[31:0] in_branch_dest, - input wire in_wspawn, - input wire[31:0] in_wspawn_pc, + input wire[`NUM_THREADS-1:0] thread_mask, + input wire change_mask, + input wire jal, + input wire[31:0] jal_dest, + input wire branch_dir, + input wire[31:0] branch_dest, + input wire wspawn, + input wire[31:0] wspawn_pc, - output wire[31:0] out_PC, - output wire[`NUM_THREADS-1:0] out_valid + output wire[31:0] PC, + output wire[`NUM_THREADS-1:0] valid ); reg[31:0] real_PC; @@ -41,40 +41,40 @@ module VX_warp ( always @(posedge clk) begin if (remove) begin valid <= valid_zero; - end else if (in_change_mask) begin - valid <= in_thread_mask; + end else if (change_mask) begin + valid <= thread_mask; end end genvar out_cur_th; generate - for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign - assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; + for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : valid_assign + assign valid[out_cur_th] = change_mask ? thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; end endgenerate always @(*) begin - if (in_jal == 1'b1) begin - temp_PC = in_jal_dest; + if (jal == 1'b1) begin + temp_PC = jal_dest; // $display("LINKING TO %h", temp_PC); - end else if (in_branch_dir == 1'b1) begin - temp_PC = in_branch_dest; + end else if (branch_dir == 1'b1) begin + temp_PC = branch_dest; end else begin temp_PC = real_PC; end end assign use_PC = temp_PC; - assign out_PC = temp_PC; + assign PC = temp_PC; always @(posedge clk) begin if (reset) begin real_PC <= 0; - end else if (in_wspawn == 1'b1) begin - // $display("Inside warp ***** Spawn @ %H",in_wspawn_pc); - real_PC <= in_wspawn_pc; + end else if (wspawn == 1'b1) begin + // $display("Inside warp ***** Spawn @ %H",wspawn_pc); + real_PC <= wspawn_pc; end else if (!stall) begin real_PC <= use_PC + 32'h4; end else begin diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 7d3485d9..e8bc6c82 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -55,7 +55,7 @@ module VX_warp_sched ( output wire[`NUM_THREADS-1:0] thread_mask, output wire[`NW_BITS-1:0] warp_num, output wire[31:0] warp_pc, - output wire ebreak_o, + output wire ebreak, output wire scheduled_warp, input wire[`NW_BITS-1:0] icache_stage_wid, @@ -331,10 +331,6 @@ module VX_warp_sched ( // .ones_found() // ); - - wire ebreak = (warp_active == 0); - assign ebreak_o = ebreak; - - /* verilator lint_on WIDTH */ + assign ebreak = (warp_active == 0); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index f678d39b..4e0e497d 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -12,9 +12,9 @@ module VX_writeback ( // Actual WB to GPR VX_wb_if writeback_if, - output wire no_slot_mem_o, - output wire no_slot_exec_o, - output wire no_slot_csr_o + output wire no_slot_mem, + output wire no_slot_exec, + output wire no_slot_csr ); VX_wb_if writeback_tempp_if(); @@ -23,17 +23,15 @@ module VX_writeback ( wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid); wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid); - - assign no_slot_mem_o = mem_wb && (exec_wb || csr_wb); - assign no_slot_csr_o = csr_wb && (exec_wb); - assign no_slot_exec_o = 0; + assign no_slot_mem = mem_wb && (exec_wb || csr_wb); + assign no_slot_csr = csr_wb && (exec_wb); + assign no_slot_exec = 0; assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result : csr_wb ? csr_wb_if.csr_result : mem_wb ? mem_wb_if.loaded_data : 0; - assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid : csr_wb ? csr_wb_if.valid : mem_wb ? mem_wb_if.wb_valid : @@ -52,9 +50,7 @@ module VX_writeback ( assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num : csr_wb ? csr_wb_if.warp_num : mem_wb ? mem_wb_if.wb_warp_num : - 0; - - + 0; assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc : csr_wb ? 32'hdeadbeef : @@ -63,19 +59,21 @@ module VX_writeback ( wire zero = 0; - wire[`NUM_THREADS-1:0][31:0] use_wb_data; + wire [`NUM_THREADS-1:0][31:0] use_wb_data; - VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register( + VX_generic_register #( + .N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33) + ) wb_register ( .clk (clk), .reset(reset), .stall(zero), .flush(zero), .in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}), .out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc}) - ); + ); + reg [31:0] last_data_wb /* verilator public */; - reg[31:0] last_data_wb /* verilator public */ ; always @(posedge clk) begin if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin last_data_wb <= use_wb_data[0]; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 19ec6362..ca6f88d1 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -184,8 +184,8 @@ VX_back_end #( .dcache_rsp_if (dcache_rsp_if), .dcache_req_if (dcache_req_if), .writeback_if (writeback_if), - .mem_delay_o (memory_delay), - .exec_delay_o (exec_delay), + .mem_delay (memory_delay), + .exec_delay (exec_delay), .gpr_stage_delay (gpr_stage_delay) ); diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 1b1bf902..1e9e66e7 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -42,7 +42,9 @@ module Vortex_Socket ( assign io_data [curr_c] = cluster_io_data [curr_c]; end - Vortex_Cluster #(.CLUSTER_ID(0)) Vortex_Cluster( + Vortex_Cluster #( + .CLUSTER_ID(0) + ) Vortex_Cluster ( .clk (clk), .reset (reset), .io_valid (cluster_io_valid), @@ -260,7 +262,6 @@ module Vortex_Socket ( .snp_fwd_addr (snp_fwd_addr), .snp_fwd_ready (& snp_fwd_ready) ); - end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 850fe19f..ef908048 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -1,8 +1,7 @@ `include "VX_cache_config.vh" -module VX_cache_core_req_bank_sel - #( +module VX_cache_core_req_bank_sel #( // Size of cache in bytes parameter CACHE_SIZE_BYTES = 1024, // Size of line inside a bank in bytes @@ -18,8 +17,7 @@ module VX_cache_core_req_bank_sel // Function ID, {Dcache=0, Icache=1, Sharedmemory=2} parameter FUNC_ID = 0, -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - + // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size parameter REQQ_SIZE = 8, // Miss Reserv Queue Knob @@ -29,7 +27,7 @@ module VX_cache_core_req_bank_sel // Snoop Req Queue parameter SNRQ_SIZE = 8, -// Queues for writebacks Knobs {1, 2, 4, 8, ...} + // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size parameter CWBQ_SIZE = 8, // Dram Writeback Queue Size @@ -42,12 +40,9 @@ module VX_cache_core_req_bank_sel // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, -// Dram knobs + // Dram knobs parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 - - - ) - ( +) ( input wire [NUM_REQUESTS-1:0] core_req_valid, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, diff --git a/hw/rtl/cache/VX_cache_req_queue.v b/hw/rtl/cache/VX_cache_req_queue.v index c78b200f..2427868b 100644 --- a/hw/rtl/cache/VX_cache_req_queue.v +++ b/hw/rtl/cache/VX_cache_req_queue.v @@ -56,21 +56,21 @@ module VX_cache_req_queue #( input wire [31:0] bank_pc, // Dequeue Data - input wire reqq_pop, - output wire reqq_req_st0, + input wire reqq_pop, + output wire reqq_req_st0, output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0, - output wire [31:0] reqq_req_addr_st0, - output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0, - output wire [4:0] reqq_req_rd_st0, - output wire [1:0] reqq_req_wb_st0, - output wire [`NW_BITS-1:0] reqq_req_warp_num_st0, - output wire [2:0] reqq_req_mem_read_st0, - output wire [2:0] reqq_req_mem_write_st0, - output wire [31:0] reqq_req_pc_st0, + output wire [31:0] reqq_req_addr_st0, + output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0, + output wire [4:0] reqq_req_rd_st0, + output wire [1:0] reqq_req_wb_st0, + output wire [`NW_BITS-1:0] reqq_req_warp_num_st0, + output wire [2:0] reqq_req_mem_read_st0, + output wire [2:0] reqq_req_mem_write_st0, + output wire [31:0] reqq_req_pc_st0, // State Data - output wire reqq_empty, - output wire reqq_full + output wire reqq_empty, + output wire reqq_full ); wire [NUM_REQUESTS-1:0] out_per_valids; diff --git a/hw/rtl/cache/VX_dcache_llv_resp_bank_sel.v b/hw/rtl/cache/VX_dcache_llv_resp_bank_sel.v deleted file mode 100644 index 66977e4b..00000000 --- a/hw/rtl/cache/VX_dcache_llv_resp_bank_sel.v +++ /dev/null @@ -1,79 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_dcache_llv_resp_bank_sel #( - // Size of cache in bytes - parameter CACHE_SIZE_BYTES = 1024, - // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE_BYTES = 16, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 8, - // Size of a word in bytes - parameter WORD_SIZE_BYTES = 4, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 2, - // Number of cycles to complete stage 1 (read from memory) - parameter STAGE_1_CYCLES = 2, - - // Queues feeding into banks Knobs {1, 2, 4, 8, ...} - // Core Request Queue Size - parameter REQQ_SIZE = 8, - // Miss Reserv Queue Knob - parameter MRVQ_SIZE = 8, - // Dram Fill Rsp Queue Size - parameter DFPQ_SIZE = 2, - // Snoop Req Queue - parameter SNRQ_SIZE = 8, - - // Queues for writebacks Knobs {1, 2, 4, 8, ...} - // Core Writeback Queue Size - parameter CWBQ_SIZE = 8, - // Dram Writeback Queue Size - parameter DWBQ_SIZE = 4, - // Dram Fill Req Queue Size - parameter DFQQ_SIZE = 8, - // Lower Level Cache Hit Queue Size - parameter LLVQ_SIZE = 16, - - // Fill Invalidator Size {Fill invalidator must be active} - parameter FILL_INVALIDAOR_SIZE = 16, - - // Dram knobs - parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 -) ( - output reg [NUM_BANKS-1:0] per_bank_llvq_pop, - input wire[NUM_BANKS-1:0] per_bank_llvq_valid, - input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr, - input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_rsp_data, - input wire[NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_llvq_rsp_tid, - - input wire llvq_pop, - output reg[NUM_REQUESTS-1:0] llvq_valid, - output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr, - output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data -); - - wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index; - wire found_bank; - - VX_generic_priority_encoder #( - .N(NUM_BANKS) - ) sel_bank( - .valids(per_bank_llvq_valid), - .index (main_bank_index), - .found (found_bank) - ); - - always @(*) begin - llvq_valid = 0; - llvq_rsp_addr = 0; - llvq_rsp_data = 0; - per_bank_llvq_pop = 0; - if (found_bank && llvq_pop) begin - llvq_valid [per_bank_llvq_rsp_tid[main_bank_index]] = 1'b1; - llvq_rsp_addr[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_addr[main_bank_index]; - llvq_rsp_data[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_data[main_bank_index]; - per_bank_llvq_pop[main_bank_index] = 1'b1; - end - end - -endmodule diff --git a/hw/rtl/cache/VX_fill_invalidator.v b/hw/rtl/cache/VX_fill_invalidator.v index 9b4b42d0..bde2a2c3 100644 --- a/hw/rtl/cache/VX_fill_invalidator.v +++ b/hw/rtl/cache/VX_fill_invalidator.v @@ -1,7 +1,6 @@ `include "VX_cache_config.vh" -module VX_fill_invalidator - #( +module VX_fill_invalidator #( // Size of cache in bytes parameter CACHE_SIZE_BYTES = 1024, // Size of line inside a bank in bytes @@ -15,8 +14,7 @@ module VX_fill_invalidator // Number of cycles to complete stage 1 (read from memory) parameter STAGE_1_CYCLES = 2, -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - + // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size parameter REQQ_SIZE = 8, // Miss Reserv Queue Knob @@ -26,7 +24,7 @@ module VX_fill_invalidator // Snoop Req Queue parameter SNRQ_SIZE = 8, -// Queues for writebacks Knobs {1, 2, 4, 8, ...} + // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size parameter CWBQ_SIZE = 8, // Dram Writeback Queue Size @@ -39,12 +37,9 @@ module VX_fill_invalidator // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, -// Dram knobs + // Dram knobs parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 - - - ) - ( +) ( input wire clk, input wire reset, @@ -53,22 +48,19 @@ module VX_fill_invalidator input wire[31:0] fill_addr, - output reg invalidate_fill - + output reg invalidate_fill ); - if (FILL_INVALIDAOR_SIZE == 0) begin assign invalidate_fill = 0; end else begin - reg[FILL_INVALIDAOR_SIZE-1:0] fills_active; - reg[FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address; + reg [FILL_INVALIDAOR_SIZE-1:0] fills_active; + reg [FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address; - - reg[FILL_INVALIDAOR_SIZE-1:0] matched_fill; + reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill; wire matched; integer fi; always @(*) begin @@ -77,10 +69,8 @@ module VX_fill_invalidator end end - assign matched = (|(matched_fill)); - wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index; wire enqueue_found; @@ -110,7 +100,7 @@ module VX_fill_invalidator end end - // reg success_found; + // reg success_found; // reg[(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] success_index; // integer curr_fill; diff --git a/hw/rtl/cache/VX_mrv_queue.v b/hw/rtl/cache/VX_mrv_queue.v deleted file mode 100644 index 36fedd7a..00000000 --- a/hw/rtl/cache/VX_mrv_queue.v +++ /dev/null @@ -1,122 +0,0 @@ - -module VX_mrv_queue - #( - parameter DATAW = 4, - parameter SIZE = 277 - ) - ( - input wire clk, - input wire reset, - input wire push, - input wire[DATAW-1:0] in_data, - - input wire pop, - output wire[DATAW-1:0] out_data, - output wire empty, - output wire full -); - - if (SIZE == 0) begin - assign empty = 1; - assign out_data = 0; - assign full = 0; - end else begin - - reg[DATAW-1:0] data[SIZE-1:0], curr_r, head_r; - reg[$clog2(SIZE+1)-1:0] size_r; - reg[$clog2(SIZE)-1:0] wr_ctr_r; - reg[$clog2(SIZE)-1:0] rd_ptr_r, rd_next_ptr_r; - reg empty_r, full_r, bypass_r; - wire reading, writing; - - assign reading = pop && !empty; - assign writing = push && !full; - - if (SIZE == 1) begin - always @(posedge clk) begin - if (reset) begin - size_r <= 0; - end else begin - if (writing && !reading) begin - size_r <= 1; - end else if (reading && !writing) begin - size_r <= 0; - end - - if (writing) begin - head_r <= in_data; - end - end - end - - assign out_data = head_r; - assign empty = (size_r == 0); - assign full = (size_r != 0) && !pop; - end else begin - always @(posedge clk) begin - if (reset) begin - wr_ctr_r <= 0; - end else begin - if (writing) - wr_ctr_r <= wr_ctr_r + 1; - end - end - - always @(posedge clk) begin - if (reset) begin - size_r <= 0; - empty_r <= 1; - full_r <= 0; - end else begin - if (writing && !reading) begin - size_r <= size_r + 1; - empty_r <= 0; - if (size_r == SIZE-1) - full_r <= 1; - end else if (reading && !writing) begin - size_r <= size_r - 1; - if (size_r == 1) - empty_r <= 1; - full_r <= 0; - end - end - end - - always @(posedge clk) begin - if (writing) begin - data[wr_ctr_r] <= in_data; - end - end - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= 0; - rd_next_ptr_r <= 1; - bypass_r <= 0; - end else begin - if (reading) begin - if (SIZE == 2) begin - rd_ptr_r <= rd_next_ptr_r; - rd_next_ptr_r <= ~rd_next_ptr_r; - end else if (SIZE > 2) begin - rd_ptr_r <= rd_next_ptr_r; - rd_next_ptr_r <= rd_ptr_r + 2; - end - end - - bypass_r <= writing && (empty_r || (1 == size_r) && reading); - curr_r <= in_data; - head_r <= data[reading ? rd_next_ptr_r : rd_ptr_r]; - end - end - - assign out_data = bypass_r ? curr_r : head_r; - assign empty = empty_r; - assign full = full_r; - end - - end - - - -endmodule \ No newline at end of file diff --git a/hw/rtl/pipe_regs/VX_d_e_reg.v b/hw/rtl/pipe_regs/VX_d_e_reg.v index f4e6b2f8..4c3018cf 100644 --- a/hw/rtl/pipe_regs/VX_d_e_reg.v +++ b/hw/rtl/pipe_regs/VX_d_e_reg.v @@ -3,14 +3,14 @@ module VX_d_e_reg ( input wire clk, input wire reset, - input wire branch_stall_i, - input wire freeze_i, + input wire branch_stall, + input wire freeze, VX_frE_to_bckE_req_if frE_to_bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if ); - wire stall = freeze_i; - wire flush = (branch_stall_i == `STALL); + wire stall = freeze; + wire flush = (branch_stall == `STALL); VX_generic_register #( .N(233 + `NW_BITS-1 + 1 + `NUM_THREADS) diff --git a/hw/rtl/pipe_regs/VX_f_d_reg.v b/hw/rtl/pipe_regs/VX_f_d_reg.v index bdcdde62..166254cb 100644 --- a/hw/rtl/pipe_regs/VX_f_d_reg.v +++ b/hw/rtl/pipe_regs/VX_f_d_reg.v @@ -3,7 +3,7 @@ module VX_f_d_reg ( input wire clk, input wire reset, - input wire freeze_i, + input wire freeze, VX_inst_meta_if fe_inst_meta_fd, VX_inst_meta_if fd_inst_meta_de @@ -11,7 +11,7 @@ module VX_f_d_reg ( ); wire flush = 1'b0; - wire stall = freeze_i == 1'b1; + wire stall = freeze == 1'b1; VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) diff --git a/hw/rtl/pipe_regs/VX_i_d_reg.v b/hw/rtl/pipe_regs/VX_i_d_reg.v index 475eebf9..ce0e650c 100644 --- a/hw/rtl/pipe_regs/VX_i_d_reg.v +++ b/hw/rtl/pipe_regs/VX_i_d_reg.v @@ -3,7 +3,7 @@ module VX_i_d_reg ( input wire clk, input wire reset, - input wire freeze_i, + input wire freeze, VX_inst_meta_if fe_inst_meta_fd, VX_inst_meta_if fd_inst_meta_de @@ -11,7 +11,7 @@ module VX_i_d_reg ( ); wire flush = 1'b0; - wire stall = freeze_i == 1'b1; + wire stall = freeze == 1'b1; VX_generic_register #( diff --git a/hw/unit_tests/generic_queue/testbench.v b/hw/unit_tests/generic_queue/testbench.v index 885edaeb..53d69d06 100644 --- a/hw/unit_tests/generic_queue/testbench.v +++ b/hw/unit_tests/generic_queue/testbench.v @@ -7,22 +7,26 @@ module testbench(); reg clk; reg reset; - reg[3:0] in_data; + reg[3:0] data_in; reg push; reg pop; - wire[3:0] out_data; + wire[3:0] data_out; wire full; wire empty; - VX_generic_queue #(.DATAW(4), .SIZE(4)) dut ( - .clk(clk), - .reset(reset), - .data_in(in_data), - .push(push), - .pop(pop), - .data_out(out_data), - .empty(empty), - .full(full)); + VX_generic_queue #( + .DATAW(4), + .SIZE(4) + ) dut ( + .clk(clk), + .reset(reset), + .data_in(data_in), + .push(push), + .pop(pop), + .data_out(data_out), + .empty(empty), + .full(full) + ); always begin #1 clk = !clk; @@ -30,27 +34,27 @@ module testbench(); initial begin $monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h", - $time, clk, reset, push, pop, in_data, empty, full, out_data); + $time, clk, reset, push, pop, data_in, empty, full, data_out); #0 clk=0; reset=1; pop=0; push=0; - #2 reset=0; in_data=4'ha; pop=0; push=1; - #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); - #0 in_data=4'hb; - #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); - #0 in_data=4'hc; - #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); - #0 in_data=4'hd; - #2 `check(full, 1); `check(out_data, 4'ha); `check(empty, 0); + #2 reset=0; data_in=4'ha; pop=0; push=1; + #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); + #0 data_in=4'hb; + #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); + #0 data_in=4'hc; + #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); + #0 data_in=4'hd; + #2 `check(full, 1); `check(data_out, 4'ha); `check(empty, 0); #0 push=0; pop=1; - #2 `check(full, 0); `check(out_data, 4'hb); `check(empty, 0); - #2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 0); - #2 `check(full, 0); `check(out_data, 4'hd); `check(empty, 0); - #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 1); - #0 in_data=4'he; push=1; pop=0; - #2 `check(full, 0); `check(out_data, 4'he); `check(empty, 0); - #0 in_data=4'hf; pop=1; - #2 `check(full, 0); `check(out_data, 4'hf); `check(empty, 0); + #2 `check(full, 0); `check(data_out, 4'hb); `check(empty, 0); + #2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 0); + #2 `check(full, 0); `check(data_out, 4'hd); `check(empty, 0); + #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 1); + #0 data_in=4'he; push=1; pop=0; + #2 `check(full, 0); `check(data_out, 4'he); `check(empty, 0); + #0 data_in=4'hf; pop=1; + #2 `check(full, 0); `check(data_out, 4'hf); `check(empty, 0); #0 push=0; - #2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 1); + #2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 1); #1 $finish; end diff --git a/simX/cache_simX.v b/simX/cache_simX.v index ce3f746e..dd343fdb 100644 --- a/simX/cache_simX.v +++ b/simX/cache_simX.v @@ -1,4 +1,3 @@ - `include "VX_define.vh" module cache_simX ( @@ -6,36 +5,28 @@ module cache_simX ( input wire reset, // Icache - input wire[31:0] in_icache_pc_addr, - input wire in_icache_valid_pc_addr, - output wire out_icache_stall, - + input wire[31:0] cache_pc_addr, + input wire icache_valid_pc_addr, + output wire icache_stall, // Dcache - input wire[2:0] in_dcache_mem_read, - input wire[2:0] in_dcache_mem_write, - input wire in_dcache_in_valid[`NT_M1:0], - input wire[31:0] in_dcache_in_address[`NT_M1:0], - output wire out_dcache_stall - + input wire[2:0] dcache_mem_read, + input wire[2:0] dcache_mem_write, + input wire dcache_in_valid[`NT_M1:0], + input wire[31:0] dcache_in_addr[`NT_M1:0], + output wire dcache_stall ); - - - - - //////////////////// ICACHE /////////////////// + VX_icache_request_if VX_icache_req; - assign VX_icache_req.pc_address = in_icache_pc_addr; - assign VX_icache_req.cache_driver_in_mem_read_o = (in_icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ; + assign VX_icache_req.pc_address = cache_pc_addr; + assign VX_icache_req.cache_driver_in_mem_read_o = (icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ; assign VX_icache_req.cache_driver_in_mem_write_o = `NO_MEM_WRITE; - assign VX_icache_req.cache_driver_in_valid_o = in_icache_valid_pc_addr; + assign VX_icache_req.cache_driver_in_valid_o = icache_valid_pc_addr; assign VX_icache_req.cache_driver_in_data_o = 0; - VX_icache_rsp_if VX_icache_rsp; - assign out_icache_stall = VX_icache_rsp.delay; - + assign icache_stall = VX_icache_rsp.delay; VX_dram_req_rsp_if #( @@ -48,25 +39,22 @@ module cache_simX ( assign VX_dram_req_rsp_icache.i_m_ready = icache_i_m_ready; - - //////////////////// DCACHE /////////////////// VX_dcache_request_if VX_dcache_req; - assign VX_dcache_req.cache_driver_in_mem_read_o = in_dcache_mem_read; - assign VX_dcache_req.cache_driver_in_mem_write_o = in_dcache_mem_write; + assign VX_dcache_req.cache_driver_in_mem_read_o = dcache_mem_read; + assign VX_dcache_req.cache_driver_in_mem_write_o = dcache_mem_write; assign VX_dcache_req.cache_driver_in_data_o = 0; genvar curr_t; for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin - assign VX_dcache_req.cache_driver_in_address_o[curr_t] = in_dcache_in_address[curr_t]; - assign VX_dcache_req.cache_driver_in_valid_o[curr_t] = in_dcache_in_valid[curr_t]; + assign VX_dcache_req.cache_driver_in_address_o[curr_t] = dcache_in_addr[curr_t]; + assign VX_dcache_req.cache_driver_in_valid_o[curr_t] = dcache_in_valid[curr_t]; end VX_dcache_response_if VX_dcache_rsp; - assign out_dcache_stall = VX_dcache_rsp.delay; - + assign dcache_stall = VX_dcache_rsp.delay; VX_dram_req_rsp_if #( @@ -78,7 +66,6 @@ module cache_simX ( reg dcache_i_m_ready; assign VX_dram_req_rsp.i_m_ready = dcache_i_m_ready; - VX_dmem_ctrl dmem_controller ( .clk (clk), .reset (reset), @@ -118,7 +105,6 @@ module cache_simX ( end end - endmodule