diff --git a/ci/test_riscv_isa.sh b/ci/test_riscv_isa.sh index 4b47b427..9195cedd 100755 --- a/ci/test_riscv_isa.sh +++ b/ci/test_riscv_isa.sh @@ -3,4 +3,4 @@ # exit when any command fails set -e -make -C hw/simulate run +make -C benchmarks/riscv_tests/isa run diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 61371d24..6debb282 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -147,8 +147,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { for (unsigned core_id = 0; core_id < num_cores; ++core_id) { uint64_t instrs_per_core, cycles_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MINSTRET, CSR_MINSTRET_H, &instrs_per_core); - ret |= vx_csr_get_l(device, core_id, CSR_MCYCLE, CSR_MCYCLE_H, &cycles_per_core); + ret |= vx_csr_get_l(device, core_id, CSR_INSTRET, CSR_INSTRET_H, &instrs_per_core); + ret |= vx_csr_get_l(device, core_id, CSR_CYCLE, CSR_CYCLE_H, &cycles_per_core); float IPC = (float)(double(instrs_per_core) / double(cycles_per_core)); if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC); instrs += instrs_per_core; diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a7a959e4..83583707 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -158,10 +158,10 @@ `define CSR_MEPC 12'h341 // Machine Counter/Timers -`define CSR_MCYCLE 12'hB00 -`define CSR_MCYCLE_H 12'hB80 -`define CSR_MINSTRET 12'hB02 -`define CSR_MINSTRET_H 12'hB82 +`define CSR_CYCLE 12'hC00 +`define CSR_CYCLE_H 12'hC80 +`define CSR_INSTRET 12'hC02 +`define CSR_INSTRET_H 12'hC82 // Machine Performance-monitoring counters // PERF: pipeline diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index ba17dc66..29c6680e 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -196,10 +196,10 @@ module VX_csr_data #( `CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]); `CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]); - `CSR_MCYCLE : read_data_r = csr_cycle[31:0]; - `CSR_MCYCLE_H : read_data_r = csr_cycle[63:32]; - `CSR_MINSTRET : read_data_r = csr_instret[31:0]; - `CSR_MINSTRET_H: read_data_r = csr_instret[63:32]; + `CSR_CYCLE : read_data_r = csr_cycle[31:0]; + `CSR_CYCLE_H : read_data_r = csr_cycle[63:32]; + `CSR_INSTRET : read_data_r = csr_instret[31:0]; + `CSR_INSTRET_H : read_data_r = csr_instret[63:32]; `CSR_MVENDORID : read_data_r = `VENDOR_ID; `CSR_MARCHID : read_data_r = `ARCHITECTURE_ID; diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index f0e86471..947e06b5 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -269,9 +269,7 @@ module VX_decode #( wire is_lsu = (is_ltype || is_stype || is_fl || is_fs); always @(*) begin - lsu_op = {is_stype, func3}; - if (is_fl) lsu_op = `LSU_LW; - if (is_fs) lsu_op = `LSU_SW; + lsu_op = (is_fl || is_fs) ? `LSU_SW : func3; end // GPU @@ -307,7 +305,8 @@ module VX_decode #( /////////////////////////////////////////////////////////////////////////// - assign decode_if.valid = ifetch_rsp_if.valid; + assign decode_if.valid = ifetch_rsp_if.valid + && (decode_if.ex_type != `EX_NOP); // skip noop assign decode_if.wid = ifetch_rsp_if.wid; assign decode_if.tmask = ifetch_rsp_if.tmask; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index f033083b..f7e7462f 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -54,14 +54,6 @@ /////////////////////////////////////////////////////////////////////////////// -`define BYTEEN_SB 3'h0 -`define BYTEEN_SH 3'h1 -`define BYTEEN_SW 3'h2 -`define BYTEEN_UB 3'h4 -`define BYTEEN_UH 3'h5 -`define BYTEEN_BITS 3 -`define BYTEEN_TYPE(x) x[1:0] - `define FRM_RNE 3'b000 // round to nearest even `define FRM_RTZ 3'b001 // round to zero `define FRM_RDN 3'b010 // round to -inf @@ -130,19 +122,14 @@ `define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0] `define IS_BR_MOD(x) x[0] -`define LSU_LB {1'b0, `BYTEEN_SB} -`define LSU_LH {1'b0, `BYTEEN_SH} -`define LSU_LW {1'b0, `BYTEEN_SW} -`define LSU_LBU {1'b0, `BYTEEN_UB} -`define LSU_LHU {1'b0, `BYTEEN_UH} -`define LSU_SB {1'b1, `BYTEEN_SB} -`define LSU_SH {1'b1, `BYTEEN_SH} -`define LSU_SW {1'b1, `BYTEEN_SW} -`define LSU_SBU {1'b1, `BYTEEN_UB} -`define LSU_SHU {1'b1, `BYTEEN_UH} -`define LSU_BITS 4 -`define LSU_RW(x) x[3] -`define LSU_BE(x) x[2:0] +`define LSU_SB 3'h0 +`define LSU_SH 3'h1 +`define LSU_SW 3'h2 +`define LSU_UB 3'h4 +`define LSU_UH 3'h5 +`define LSU_BITS 3 +`define LSU_WSIZE(x) x[1:0] +`define LSU_OP(x) x[`LSU_BITS-1:0] `define CSR_RW 2'h0 `define CSR_RS 2'h1 diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index ce4783e1..98c07d7c 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -14,6 +14,9 @@ module VX_gpr_stage #( VX_gpr_rsp_if gpr_rsp_if ); `UNUSED_VAR (reset) + + // ensure r0 never gets written, which can happen before the reset + wire write_enable = writeback_if.valid && (writeback_if.rd != 0); `ifdef EXT_F_ENABLE localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; @@ -31,7 +34,7 @@ module VX_gpr_stage #( .DEPTH (RAM_DEPTH) ) gpr_ram_f ( .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i]), + .wren (write_enable && writeback_if.tmask[i]), .waddr (waddr), .wdata (writeback_if.data[i]), .raddr1 (raddr1), @@ -62,7 +65,7 @@ module VX_gpr_stage #( .DEPTH (RAM_DEPTH) ) gpr_ram_i ( .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i]), + .wren (write_enable && writeback_if.tmask[i]), .waddr (waddr), .wdata (writeback_if.data[i]), .raddr1 (raddr1), diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index affa797b..167f2f89 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -54,14 +54,14 @@ module VX_instr_demux ( wire lsu_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) ) lsu_buffer ( .clk (clk), .reset (reset), .valid_in (lsu_req_valid), .ready_in (lsu_req_ready), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_OP(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), .valid_out (lsu_req_if.valid), .ready_out (lsu_req_if.ready) ); diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index f415a999..1cd122a6 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -205,7 +205,7 @@ module VX_issue #( $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data); end if (lsu_req_if.valid && lsu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); end if (csr_req_if.valid && csr_req_if.ready) begin $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index afb2ab49..8d918bcb 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -19,14 +19,12 @@ module VX_lsu_unit #( VX_commit_if ld_commit_if, VX_commit_if st_commit_if ); + wire req_valid; wire [`NUM_THREADS-1:0] req_tmask; - wire req_rw; - wire [`NUM_THREADS-1:0][29:0] req_addr; - wire [`NUM_THREADS-1:0][1:0] req_offset; - wire [`NUM_THREADS-1:0][3:0] req_byteen; - wire [`NUM_THREADS-1:0][31:0] req_data; - wire [1:0] req_sext; + wire [`NUM_THREADS-1:0][31:0] req_addr; + wire [`LSU_BITS-1:0] req_type; + wire [`NUM_THREADS-1:0][31:0] req_data; wire [`NR_BITS-1:0] req_rd; wire req_wb; wire [`NW_BITS-1:0] req_wid; @@ -38,46 +36,13 @@ module VX_lsu_unit #( assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset; end - reg [1:0] mem_req_sext; - always @(*) begin - case (lsu_req_if.byteen) - `BYTEEN_SB: mem_req_sext = 2'h1; - `BYTEEN_SH: mem_req_sext = 2'h2; - default: mem_req_sext = 2'h0; - endcase - end - - wire [`NUM_THREADS-1:0][29:0] mem_req_addr; - wire [`NUM_THREADS-1:0][1:0] mem_req_offset; - wire [`NUM_THREADS-1:0][3:0] mem_req_byteen; - wire [`NUM_THREADS-1:0][31:0] mem_req_data; - - reg [3:0] wmask; - always @(*) begin - case (`BYTEEN_TYPE(lsu_req_if.byteen)) - 0: wmask = 4'b0001; - 1: wmask = 4'b0011; - default: wmask = 4'b1111; - endcase - end - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign mem_req_addr[i] = full_address[i][31:2]; - assign mem_req_offset[i] = full_address[i][1:0]; - assign mem_req_byteen[i] = wmask << full_address[i][1:0]; - assign mem_req_data[i] = lsu_req_if.store_data[i] << {full_address[i][1:0], 3'b0}; - end - - reg [`NUM_THREADS-2:0] addr_matches; - always @(*) begin - for (integer i = 1; i < `NUM_THREADS; i++) begin - addr_matches[i-1] = (mem_req_addr[0] == mem_req_addr[i]) || ~lsu_req_if.tmask[i]; - end - end - wire is_dup_load = (0 == lsu_req_if.rw) && (& addr_matches); + wire [`NUM_THREADS-2:0] addr_matches; + for (genvar i = 1; i < `NUM_THREADS; i++) begin + assign addr_matches[i-1] = (full_address[0][31:2] == full_address[i][31:2]) || ~lsu_req_if.tmask[i]; + end + wire is_dup_load = lsu_req_if.wb && (& addr_matches); `IGNORE_WARNINGS_BEGIN - wire [`NUM_THREADS-1:0][31:0] req_address; reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; `IGNORE_WARNINGS_END @@ -85,14 +50,14 @@ module VX_lsu_unit #( wire stall_in = ~ready_in & req_valid; VX_pipe_register #( - .DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), + .DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)), .RESETW (1) ) req_pipe_reg ( .clk (clk), .reset (reset), .enable (!stall_in), - .data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), - .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) + .data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_address, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), + .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_type, req_rd, req_wb, req_data}) ); // Can accept new request? @@ -102,8 +67,7 @@ module VX_lsu_unit #( wire [31:0] rsp_pc; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; - wire [`NUM_THREADS-1:0][1:0] rsp_offset; - wire [1:0] rsp_sext; + wire [`LSU_BITS-1:0] rsp_type; wire rsp_is_dup; reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask; @@ -115,9 +79,14 @@ module VX_lsu_unit #( wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr; wire mbuf_full; + wire [`NUM_THREADS-1:0][1:0] req_offset, rsp_offset; + for (genvar i = 0; i < `NUM_THREADS; i++) begin + assign req_offset[i] = req_addr[i][1:0]; + end + wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready)) && (0 == req_sent_mask) // first submission only - && (0 == req_rw); // loads only + && req_wb; // loads only wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; @@ -126,7 +95,7 @@ module VX_lsu_unit #( assign mbuf_raddr = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0]; VX_index_buffer #( - .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2 + 1), + .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1), .SIZE (`LSUQ_SIZE), .FASTRAM (1) ) req_metadata ( @@ -135,8 +104,8 @@ module VX_lsu_unit #( .write_addr (mbuf_waddr), .acquire_slot (mbuf_push), .read_addr (mbuf_raddr), - .write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext, req_is_dup}), - .read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext, rsp_is_dup}), + .write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}), + .read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}), .release_addr (mbuf_raddr), .release_slot (mbuf_pop), .full (mbuf_full) @@ -175,16 +144,45 @@ module VX_lsu_unit #( end end - wire req_ready_dep = (!req_rw && !mbuf_full) || (req_rw && st_commit_if.ready); + wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready); wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; - // Core Request + // DCache Request + + reg [`NUM_THREADS-1:0][29:0] mem_req_addr; + reg [`NUM_THREADS-1:0][3:0] mem_req_byteen; + reg [`NUM_THREADS-1:0][31:0] mem_req_data; + + always @(*) begin + for (integer i = 0; i < `NUM_THREADS; i++) begin + mem_req_byteen[i] = {4{req_wb}}; + case (`LSU_WSIZE(req_type)) + 0: mem_req_byteen[i][req_offset[i]] = 1; + 1: begin + mem_req_byteen[i][req_offset[i]] = 1; + mem_req_byteen[i][{req_addr[i][1], 1'b1}] = 1; + end + default : mem_req_byteen[i] = {4{1'b1}}; + endcase + + mem_req_data[i] = 'x; + case (req_offset[i]) + 1: mem_req_data[i][31:8] = req_data[i][23:0]; + 2: mem_req_data[i][31:16] = req_data[i][15:0]; + 3: mem_req_data[i][31:24] = req_data[i][7:0]; + default: mem_req_data[i] = req_data[i]; + endcase + + mem_req_addr[i] = req_addr[i][31:2]; + end + end + assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask; - assign dcache_req_if.rw = {`NUM_THREADS{req_rw}}; - assign dcache_req_if.byteen = req_byteen; - assign dcache_req_if.addr = req_addr; - assign dcache_req_if.data = req_data; + assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}}; + assign dcache_req_if.addr = mem_req_addr; + assign dcache_req_if.byteen = mem_req_byteen; + assign dcache_req_if.data = mem_req_data; `ifdef DBG_CACHE_REQ_INFO assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}}; @@ -194,28 +192,9 @@ module VX_lsu_unit #( assign ready_in = req_ready_dep && req_sent_all; - // load response formatting - - reg [`NUM_THREADS-1:0][31:0] rsp_data; - wire [`NUM_THREADS-1:0] rsp_tmask; - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; - wire [31:0] rsp_data_shifted = src_data >> {rsp_offset[i], 3'b0}; - always @(*) begin - case (rsp_sext) - 1: rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]}; - 2: rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]}; - default: rsp_data[i] = rsp_data_shifted; - endcase - end - end - - wire [`NUM_THREADS-1:0] rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid; - // send store commit - wire is_store_rsp = req_valid && req_rw && req_sent_all; + wire is_store_rsp = req_valid && ~req_wb && req_sent_all; assign st_commit_if.valid = is_store_rsp; assign st_commit_if.wid = req_wid; @@ -226,6 +205,34 @@ module VX_lsu_unit #( assign st_commit_if.eop = 1'b1; assign st_commit_if.data = 0; + // load response formatting + + reg [`NUM_THREADS-1:0][31:0] rsp_data; + wire [`NUM_THREADS-1:0] rsp_tmask; + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; + + reg [31:0] rsp_data_shifted; + always @(*) begin + rsp_data_shifted[31:16] = src_data[31:16]; + rsp_data_shifted[15:0] = rsp_offset[i][1] ? src_data[31:16] : src_data[15:0]; + rsp_data_shifted[7:0] = rsp_offset[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0]; + end + + always @(*) begin + case (rsp_type) + `LSU_SB: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0])); + `LSU_SH: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0])); + `LSU_UB: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0])); + `LSU_UH: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0])); + default: rsp_data[i] = rsp_data_shifted; + endcase + end + end + + wire [`NUM_THREADS-1:0] rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid; + // send load commit wire is_load_rsp = (| dcache_rsp_if.valid); @@ -250,8 +257,8 @@ module VX_lsu_unit #( `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready); `SCOPE_ASSIGN (dcache_req_wid, req_wid); `SCOPE_ASSIGN (dcache_req_pc, req_pc); - `SCOPE_ASSIGN (dcache_req_addr, req_address); - `SCOPE_ASSIGN (dcache_req_rw, req_rw); + `SCOPE_ASSIGN (dcache_req_addr, req_addr); + `SCOPE_ASSIGN (dcache_req_rw, ~req_wb); `SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen); `SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data); `SCOPE_ASSIGN (dcache_req_tag, req_tag); @@ -264,10 +271,10 @@ module VX_lsu_unit #( if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin if (dcache_req_if.rw[0]) $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); + $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); else $display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); + $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); end if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin $display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b", diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index b76c6368..23674a1f 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -61,17 +61,12 @@ task print_ex_op ( end `EX_LSU: begin case (`LSU_BITS'(op_type)) - `LSU_LB: $write("LB"); - `LSU_LH: $write("LH"); - `LSU_LW: $write("LW"); - `LSU_LBU: $write("LBU"); - `LSU_LHU: $write("LHU"); - `LSU_SB: $write("SB"); - `LSU_SH: $write("SH"); - `LSU_SW: $write("SW"); - `LSU_SBU: $write("SBU"); - `LSU_SHU: $write("SHU"); - default: $write("?"); + `LSU_SB: $write("SB"); + `LSU_SH: $write("SH"); + `LSU_SW: $write("SW"); + `LSU_UB: $write("UB"); + `LSU_UH: $write("UH"); + default: $write("?"); endcase end `EX_CSR: begin diff --git a/hw/rtl/interfaces/VX_lsu_req_if.v b/hw/rtl/interfaces/VX_lsu_req_if.v index 1bfb5a36..87b9a72b 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.v +++ b/hw/rtl/interfaces/VX_lsu_req_if.v @@ -9,8 +9,7 @@ interface VX_lsu_req_if (); wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; - wire rw; - wire [`BYTEEN_BITS-1:0] byteen; + wire [`LSU_BITS-1:0] op_type; wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] base_addr; wire [31:0] offset; diff --git a/runtime/src/vx_intrinsics.S b/runtime/src/vx_intrinsics.S index 15694bfb..28b6a571 100644 --- a/runtime/src/vx_intrinsics.S +++ b/runtime/src/vx_intrinsics.S @@ -89,11 +89,11 @@ vx_num_cores: .type vx_num_cycles, @function .global vx_num_cycles vx_num_cycles: - csrr a0, CSR_MCYCLE + csrr a0, CSR_CYCLE ret .type vx_num_instrs, @function .global vx_num_instrs vx_num_instrs: - csrr a0, CSR_MINSTRET + csrr a0, CSR_INSTRET ret \ No newline at end of file