From c9755a0c48bce7435200ad506205ae3070b832b2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 31 Jul 2020 09:29:44 -0400 Subject: [PATCH] lkg build with pipeline + FPU fixes --- Makefile | 8 +- README.md | 4 +- driver/opae/vortex.cpp | 8 +- driver/tests/demo/kernel.bin | Bin 6852 -> 6812 bytes driver/tests/demo/kernel.dump | 4 +- driver/tests/demo/kernel.elf | Bin 9180 -> 9180 bytes hw/Makefile | 5 +- hw/rtl/VX_alu_unit.v | 36 +++--- hw/rtl/VX_commit.v | 26 +++-- hw/rtl/VX_config.vh | 72 ++++++------ hw/rtl/VX_csr_data.v | 164 +++++++++++++++------------ hw/rtl/VX_csr_unit.v | 66 ++++++----- hw/rtl/VX_decode.v | 8 +- hw/rtl/VX_define.vh | 7 +- hw/rtl/VX_fpu_unit.v | 159 ++++++++++++++------------ hw/rtl/VX_icache_stage.v | 2 +- hw/rtl/VX_issue.v | 2 +- hw/rtl/VX_issue_demux.v | 2 +- hw/rtl/VX_scheduler.v | 2 +- hw/rtl/VX_writeback.v | 7 +- hw/rtl/cache/VX_bank.v | 24 +++- hw/rtl/cache/VX_tag_data_access.v | 40 ++++++- hw/rtl/interfaces/VX_cmt_to_csr_if.v | 10 +- hw/rtl/interfaces/VX_csr_io_req_if.v | 10 +- hw/rtl/interfaces/VX_csr_req_if.v | 2 +- hw/rtl/interfaces/VX_fpu_to_cmt_if.v | 6 +- hw/simulate/Makefile | 2 +- hw/simulate/simulator.cpp | 10 +- hw/simulate/simulator.h | 4 +- runtime/src/vx_intrinsics.S | 4 +- runtime/tests/simple/vx_simple.dump | 4 +- runtime/tests/simple/vx_simple.elf | Bin 12500 -> 12500 bytes runtime/tests/simple/vx_simple.hex | 4 +- 33 files changed, 408 insertions(+), 294 deletions(-) mode change 100644 => 100755 driver/tests/demo/kernel.bin diff --git a/Makefile b/Makefile index c9fc76e2..8b090422 100644 --- a/Makefile +++ b/Makefile @@ -3,4 +3,10 @@ all: $(MAKE) -C hw $(MAKE) -C driver $(MAKE) -C runtime - $(MAKE) -C simX \ No newline at end of file + $(MAKE) -C simX + +clean: + $(MAKE) -C hw clean + $(MAKE) -C driver clean + $(MAKE) -C runtime clean + $(MAKE) -C simX clean \ No newline at end of file diff --git a/README.md b/README.md index e4f27ad7..efe3b48e 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Install development tools Install gnu-riscv-tools - $ export RISC_GNU_TOOLS_PATH=/opt/riscv-gnu-toolchain + $ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain $ sudo apt-get -y install \ binutils build-essential libtool texinfo \ @@ -51,7 +51,7 @@ Install gnu-riscv-tools $ git submodule update --init --recursive $ mkdir build $ cd build - $ ../configure --prefix=$RISC_GNU_TOOLS_PATH --with-arch=rv32im --with-abi=ilp32 + $ ../configure --prefix=$RISCV_TOOLCHAIN_PATH --with-arch=rv32im --with-abi=ilp32 $ make -j`nproc` $ make -j`nproc` build-qemu diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 07e0385c..02955678 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -173,7 +173,7 @@ extern int vx_dev_open(vx_device_h* hdevice) { { // Load device CAPS int ret = 0; - ret |= vx_csr_get(device, 0, CSR_IMPL_ID, &device->implementation_id); + ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id); ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores); ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps); ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads); @@ -217,14 +217,14 @@ extern int vx_dev_close(vx_device_h hdevice) { unsigned value; int ret = 0; - ret |= vx_csr_get(hdevice, 0, CSR_INSTR_H, &value); + ret |= vx_csr_get(hdevice, 0, CSR_INSTRET_H, &value); instrs = value; - ret |= vx_csr_get(hdevice, 0, CSR_INSTR_L, &value); + ret |= vx_csr_get(hdevice, 0, CSR_INSTRET, &value); instrs = (instrs << 32) | value; ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_H, &value); cycles = value; - ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_L, &value); + ret |= vx_csr_get(hdevice, 0, CSR_CYCLE, &value); cycles = (cycles << 32) | value; float IPC = (float)(double(instrs) / double(cycles)); diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin old mode 100644 new mode 100755 index 06939d4c46190f07fb214341beca87cb3a6ec952..280e5cbc251c068b436a3e1c9c0f009cf8732a26 GIT binary patch delta 310 zcmX?NI>$7AIx7Ri|`gjod`vKd$zgj)|D5^iHWY|OUIVKPwN zdxi!paUdpJuzNctU+or zI|D;<2Rnl~D?zddD zwWJAicBUC~{&WDlNNFO2aHOU~G6N@*FiT6?<9-h77nf)G!P cBtydyAl8v$XqY^aZN_E?@djp=0x^aL0O=o9egFUf delta 347 zcmZ9GF-yZx6h^-!@1+C;+diArQbn3p%%F&+bP^)8in}1XROl2c7WEH2X{)Z`iDqze z@DEhc!J$>Vxpo$G6^bZ=i~7>h;c|w1F87=l9t)K-3W!#j?dP`CTJ#zNxN%0KsDXmD zy7%l*dC2RRu8`gUShOS=Ezb_XXQaCzi8|mgMftU-W3aOa(&H8*G^Ag#%}z>0fa-As zMG7vD=j-TX{2-mS=VE9$Wi+xaXihJDFe)esIpII;>D~TE&V3gpWk;2`QZAI9SXY;( zm@ebO@UH}_O&xjKgXkx~OcSrC^hcbi%chdG{?c; -80000140: f14025f3 csrr a1,mhartid +80000140: 022025f3 csrr a1,0x22 80000144: 00a59593 slli a1,a1,0xa 80000148: 02002673 csrr a2,0x20 8000014c: 00261613 slli a2,a2,0x2 @@ -145,7 +145,7 @@ Disassembly of section .text: 800001ac: 00008067 ret 800001b0 : -800001b0: f1402573 csrr a0,mhartid +800001b0: 02202573 csrr a0,0x22 800001b4: 00008067 ret 800001b8 : diff --git a/driver/tests/demo/kernel.elf b/driver/tests/demo/kernel.elf index d91b509c4adc2825891d3aa1330b5591e8222962..f7681ea2d27914720ddbc09239e7164f31cf8a04 100755 GIT binary patch delta 22 ccmccPe#d=-lOU4<(_|OHJRr4si=c)O09i-|ng9R* delta 22 ccmccPe#d=-lOU7B$H^{&c|dCO7C{Xm0BxlRxBvhE diff --git a/hw/Makefile b/hw/Makefile index 4c5575a4..d615dfc0 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -1,4 +1,7 @@ .PHONY: build_config build_config: - ./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h \ No newline at end of file + ./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h + +clean: + rm ./rtl/VX_user_config.vh ./VX_config.h \ No newline at end of file diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 7a5dd6a3..3fe6c5b0 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -48,7 +48,7 @@ module VX_alu_unit #( end end - wire [`NT_BITS-1:0] br_result_index, br_result_index_o; + wire [`NT_BITS-1:0] br_result_index; VX_priority_encoder #( .N(`NUM_THREADS) @@ -58,8 +58,14 @@ module VX_alu_unit #( `UNUSED_PIN (valid_out) ); - wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : 0; - wire [`BR_BITS-1:0] br_op_o; + wire [32:0] br_result = sub_result[br_result_index]; + wire br_sign = br_result[32]; + wire br_nzero = (| br_result[31:0]); + wire br_sign_s1; + wire br_nzero_s1; + + wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO; + wire [`BR_BITS-1:0] br_op_s1; wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC; wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset); @@ -70,34 +76,30 @@ module VX_alu_unit #( wire stall = ~alu_commit_if.ready && alu_commit_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + `NT_BITS) + .N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1) ) alu_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (0), - .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_result_index}), - .out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_o, branch_ctl_if.dest, br_result_index_o}) - ); - - wire [31:0] br_result = alu_commit_if.data[br_result_index_o]; - wire br_sign = br_result[31]; - wire br_nzero = (| br_result[31:0]); + .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}), + .out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1}) + ); reg br_taken; always @(*) begin - case (br_op_o) - `BR_NE: br_taken = br_nzero; - `BR_EQ: br_taken = ~br_nzero; + case (br_op_s1) + `BR_NE: br_taken = br_nzero_s1; + `BR_EQ: br_taken = ~br_nzero_s1; `BR_LT, - `BR_LTU: br_taken = br_sign; + `BR_LTU: br_taken = br_sign_s1; `BR_GE, - `BR_GEU: br_taken = ~br_sign; + `BR_GEU: br_taken = ~br_sign_s1; default: br_taken = 1'b1; endcase end - assign branch_ctl_if.valid = alu_req_if.valid && (br_op_o != 0); + assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO); assign branch_ctl_if.taken = br_taken; assign alu_req_if.ready = ~stall; diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index b6b9db45..4381cc6e 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -39,15 +39,27 @@ module VX_commit #( ); assign cmt_to_csr_if.valid = (| commited_mask); + assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num; assign cmt_to_csr_if.num_commits = num_commits; + + assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags; + + integer i; - assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags; - assign cmt_to_csr_if.fpu_warp_num = cmt_to_issue_if.fpu_data.warp_num; - assign cmt_to_csr_if.fflags_NV = fpu_commit_if.fflags_NV; - assign cmt_to_csr_if.fflags_DZ = fpu_commit_if.fflags_DZ; - assign cmt_to_csr_if.fflags_OF = fpu_commit_if.fflags_OF; - assign cmt_to_csr_if.fflags_UF = fpu_commit_if.fflags_UF; - assign cmt_to_csr_if.fflags_NX = fpu_commit_if.fflags_NX; + reg [`FFG_BITS-1:0] fflags; + always @(*) begin + fflags = 0; + for (i = 0; i < `NUM_THREADS; i++) begin + if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin + fflags[0] |= fpu_commit_if.fflags[i][0]; + fflags[1] |= fpu_commit_if.fflags[i][1]; + fflags[2] |= fpu_commit_if.fflags[i][2]; + fflags[3] |= fpu_commit_if.fflags[i][3]; + fflags[4] |= fpu_commit_if.fflags[i][4]; + end + end + end + assign cmt_to_csr_if.fflags = fflags; // Notify issue stage diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 0562cc3f..36a1ce57 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -27,10 +27,6 @@ `define GLOBAL_BLOCK_SIZE 16 `endif -`ifndef NUM_CSRS -`define NUM_CSRS 64 -`endif - `ifndef STARTUP_ADDR `define STARTUP_ADDR 32'h80000000 `endif @@ -59,38 +55,11 @@ `define EXT_F_ENABLE -// Configuration Values ======================================================= - +// Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 `define IMPLEMENTATION_ID 0 -// CSR Addresses ============================================================== - -`define CSR_FFLAGS 12'h001 -`define CSR_FRM 12'h002 -`define CSR_FCSR 12'h003 - -`define CSR_VEND_ID 12'hF11 -`define CSR_ARCH_ID 12'hF12 -`define CSR_IMPL_ID 12'hF13 -`define CSR_GTID 12'hF14 - -`define CSR_LTID 12'h020 -`define CSR_LWID 12'h021 -`define CSR_GWID 12'h023 -`define CSR_GCID 12'h024 -`define CSR_NT 12'h025 -`define CSR_NW 12'h026 -`define CSR_NC 12'h027 - -`define CSR_CYCLE_L 12'hC00 -`define CSR_CYCLE_H 12'hC80 -`define CSR_INSTR_L 12'hC02 -`define CSR_INSTR_H 12'hC82 - -`define CSR_MISA 12'h301 - // Size of MUL Request Queue Size `ifndef MULRQ_SIZE `define MULRQ_SIZE 8 @@ -106,6 +75,45 @@ `define ISSUEQ_SIZE (8 + `NUM_WARPS) `endif +// CSR Addresses ////////////////////////////////////////////////////////////// + +`define CSR_FFLAGS 12'h001 +`define CSR_FRM 12'h002 +`define CSR_FCSR 12'h003 + +`define CSR_LTID 12'h020 +`define CSR_LWID 12'h021 +`define CSR_GTID 12'h022 +`define CSR_GWID 12'h023 +`define CSR_GCID 12'h024 +`define CSR_NT 12'h025 +`define CSR_NW 12'h026 +`define CSR_NC 12'h027 + +`define CSR_SATP 12'h180 + +`define CSR_PMPCFG0 12'h3A0 +`define CSR_PMPADDR0 12'h3B0 + +`define CSR_MSTATUS 12'h300 +`define CSR_MISA 12'h301 +`define CSR_MEDELEG 12'h302 +`define CSR_MIDELEG 12'h303 +`define CSR_MIE 12'h304 +`define CSR_MTVEC 12'h305 + +`define CSR_MEPC 12'h341 + +`define CSR_CYCLE 12'hC00 +`define CSR_CYCLE_H 12'hC80 +`define CSR_INSTRET 12'hC02 +`define CSR_INSTRET_H 12'hC82 + +`define CSR_MVENDORID 12'hF11 +`define CSR_MARCHID 12'hF12 +`define CSR_MIMPID 12'hF13 +`define CSR_MHARTID 12'hF14 + // Dcache Configurable Knobs ================================================== // Size of cache in bytes diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index c640c0f2..d905ea33 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -11,107 +11,129 @@ module VX_csr_data #( input wire[`NW_BITS-1:0] warp_num, - input wire[`CSR_ADDR_SIZE-1:0] read_addr, + input wire read_enable, + input wire[`CSR_ADDR_BITS-1:0] read_addr, output reg[31:0] read_data, + input wire write_enable, -`IGNORE_WARNINGS_BEGIN - // We use a smaller storage for CSRs than the standard 4KB in RISC-V - input wire[`CSR_ADDR_SIZE-1:0] write_addr, -`IGNORE_WARNINGS_END + input wire[`CSR_ADDR_BITS-1:0] write_addr, input wire[`CSR_WIDTH-1:0] write_data ); - reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0]; - reg [`FFG_BITS+`FRM_BITS-1:0] fflags_table [`NUM_WARPS-1:0]; - reg [`FRM_BITS-1:0] frm_table [`NUM_WARPS-1:0]; - reg [`FFG_BITS+`FRM_BITS-1:0] fcsr_table [`NUM_WARPS-1:0]; // fflags + frm - - // cast address to physical CSR range - wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr; - assign rd_addr = $size(rd_addr)'(read_addr); - assign wr_addr = $size(wr_addr)'(write_addr); - - wire [`FFG_BITS-1:0] fflags_update; - assign fflags_update[4] = cmt_to_csr_if.fflags_NV; - assign fflags_update[3] = cmt_to_csr_if.fflags_DZ; - assign fflags_update[2] = cmt_to_csr_if.fflags_OF; - assign fflags_update[1] = cmt_to_csr_if.fflags_UF; - assign fflags_update[0] = cmt_to_csr_if.fflags_NX; - - integer i; + reg [`CSR_WIDTH-1:0] csr_satp; + reg [`CSR_WIDTH-1:0] csr_mstatus; + reg [`CSR_WIDTH-1:0] csr_medeleg; + reg [`CSR_WIDTH-1:0] csr_mideleg; + reg [`CSR_WIDTH-1:0] csr_mie; + reg [`CSR_WIDTH-1:0] csr_mtvec; + reg [`CSR_WIDTH-1:0] csr_mepc; + reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0]; + reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0]; + reg [63:0] csr_cycle; + reg [63:0] csr_instret; + + reg [`FFG_BITS-1:0] csr_fflags [`NUM_WARPS-1:0]; + reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0]; + reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm always @(posedge clk) begin - if (reset) begin - for (i = 0; i < `NUM_WARPS; i++) begin - fflags_table[i] <= 0; - frm_table[i] <= 0; - fcsr_table[i] <= 0; - end - end else begin - if (write_enable) begin - case (write_addr) - `CSR_FFLAGS: begin - fcsr_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0]; - fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0]; - end - `CSR_FRM: begin - fcsr_table[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0]; - frm_table[warp_num] <= write_data[`FRM_BITS-1:0]; - end - `CSR_FCSR: begin - fcsr_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0]; - frm_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS]; - fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0]; - end - default: begin - csr_table[wr_addr] <= write_data; + if (cmt_to_csr_if.upd_fflags) begin + csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags; + csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags; + end + + if (write_enable) begin + case (write_addr) + `CSR_FFLAGS: begin + csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0]; + csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0]; + end + `CSR_FRM: begin + csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0]; + csr_frm[warp_num] <= write_data[`FRM_BITS-1:0]; + end + `CSR_FCSR: begin + csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0]; + csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS]; + csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0]; + end + `CSR_SATP: csr_satp <= write_data; + + `CSR_MSTATUS: csr_mstatus <= write_data; + `CSR_MEDELEG: csr_medeleg <= write_data; + `CSR_MIDELEG: csr_mideleg <= write_data; + `CSR_MIE: csr_mie <= write_data; + `CSR_MTVEC: csr_mtvec <= write_data; + + `CSR_MEPC: csr_mepc <= write_data; + + `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data; + `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; + + default: begin + assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr); end - endcase - end else if (cmt_to_csr_if.upd_fflags) begin - fflags_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update; - fcsr_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update; - end + endcase end end - reg [63:0] total_cycles, total_instrs; - always @(posedge clk) begin if (reset) begin - total_cycles <= 0; - total_instrs <= 0; + csr_cycle <= 0; + csr_instret <= 0; end else begin - total_cycles <= total_cycles + 1; + csr_cycle <= csr_cycle + 1; if (cmt_to_csr_if.valid) begin - total_instrs <= total_instrs + 64'(cmt_to_csr_if.num_commits); + csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits); end end end always @(*) begin case (read_addr) - `CSR_FFLAGS : read_data = 32'(fflags_table[warp_num]); - `CSR_FRM : read_data = 32'(frm_table[warp_num]); - `CSR_FCSR : read_data = 32'(fcsr_table[warp_num]); + `CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]); + `CSR_FRM : read_data = 32'(csr_frm[warp_num]); + `CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]); + `CSR_LWID : read_data = 32'(warp_num); + `CSR_LTID , `CSR_GTID , + `CSR_MHARTID , `CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num); `CSR_GCID : read_data = CORE_ID; `CSR_NT : read_data = `NUM_THREADS; `CSR_NW : read_data = `NUM_WARPS; `CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS; - `CSR_CYCLE_L : read_data = total_cycles[31:0]; - `CSR_CYCLE_H : read_data = total_cycles[63:32]; - `CSR_INSTR_L : read_data = total_instrs[31:0]; - `CSR_INSTR_H : read_data = total_instrs[63:32]; - `CSR_VEND_ID : read_data = `VENDOR_ID; - `CSR_ARCH_ID : read_data = `ARCHITECTURE_ID; - `CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID; + + `CSR_SATP : read_data = 32'(csr_satp); + + `CSR_MSTATUS : read_data = 32'(csr_mstatus); `CSR_MISA : read_data = `ISA_CODE; - default : read_data = 32'(csr_table[rd_addr]); + `CSR_MEDELEG : read_data = 32'(csr_medeleg); + `CSR_MIDELEG : read_data = 32'(csr_mideleg); + `CSR_MIE : read_data = 32'(csr_mie); + `CSR_MTVEC : read_data = 32'(csr_mtvec); + + `CSR_MEPC : read_data = 32'(csr_mepc); + + `CSR_PMPCFG0 : read_data = 32'(csr_pmpcfg[0]); + `CSR_PMPADDR0: read_data = 32'(csr_pmpaddr[0]); + + `CSR_CYCLE : read_data = csr_cycle[31:0]; + `CSR_CYCLE_H : read_data = csr_cycle[63:32]; + `CSR_INSTRET : read_data = csr_instret[31:0]; + `CSR_INSTRET_H:read_data = csr_instret[63:32]; + + `CSR_MVENDORID:read_data = `VENDOR_ID; + `CSR_MARCHID : read_data = `ARCHITECTURE_ID; + `CSR_MIMPID : read_data = `IMPLEMENTATION_ID; + + default: begin + assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr); + end endcase end - assign csr_to_fpu_if.frm = frm_table[csr_to_fpu_if.warp_num]; + assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num]; -endmodule +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 8e4baf8b..14ea5fd8 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -37,12 +37,11 @@ module VX_csr_unit #( .select_io_rsp (select_io_rsp) ); - wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2; - wire [31:0] csr_read_data_s2; - wire [31:0] csr_updated_data_s2; - wire [31:0] csr_read_data_unqual; - - wire is_csr_s2 = csr_pipe_commit_if.valid; + wire csr_we_s1; + wire [`CSR_ADDR_BITS-1:0] csr_addr_s1; + wire [31:0] csr_read_data, csr_read_data_s1; + wire [31:0] csr_updated_data_s1; + wire [`NW_BITS-1:0] warp_num_s1; VX_csr_data #( .CORE_ID(CORE_ID) @@ -51,51 +50,64 @@ module VX_csr_unit #( .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), .csr_to_fpu_if (csr_to_fpu_if), + .read_enable (csr_pipe_req_if.valid), .read_addr (csr_pipe_req_if.csr_addr), - .read_data (csr_read_data_unqual), - .write_enable (is_csr_s2), - .write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]), - .write_addr (csr_addr_s2), + .read_data (csr_read_data), + .write_enable (csr_we_s1), + .write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]), + .write_addr (csr_addr_s1), .warp_num (csr_pipe_req_if.warp_num) - ); + ); - wire [`NW_BITS-1:0] warp_num_s2; + wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr) + && (warp_num_s1 == csr_pipe_req_if.warp_num) + && csr_pipe_commit_if.valid; - wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr) - && (warp_num_s2 == csr_pipe_req_if.warp_num) - && is_csr_s2; - - wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual; + wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data; reg [31:0] csr_updated_data; + reg csr_we_s0_unqual; + always @(*) begin + csr_we_s0_unqual = 0; case (csr_pipe_req_if.csr_op) - `CSR_RW: csr_updated_data = csr_pipe_req_if.csr_mask; - `CSR_RS: csr_updated_data = csr_read_data | csr_pipe_req_if.csr_mask; - `CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask); + `CSR_RW: begin + csr_updated_data = csr_pipe_req_if.csr_mask; + csr_we_s0_unqual = 1; + end + `CSR_RS: begin + csr_updated_data = csr_read_data_qual | csr_pipe_req_if.csr_mask; + csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0); + end + `CSR_RC: begin + csr_updated_data = csr_read_data_qual & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask); + csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0); + end default: csr_updated_data = 32'hdeadbeef; endcase - end + end + + wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid; wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid; VX_generic_register #( - .N(1 + `ISTAG_BITS + `NW_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32) + .N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32) ) csr_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (0), - .in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}), - .out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s2, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2}) + .in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), + .out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) ); genvar i; for (i = 0; i < `NUM_THREADS; i++) begin - assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i : - (csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) : - csr_read_data_s2; + assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i : + (csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) : + csr_read_data_s1; end assign csr_pipe_req_if.ready = ~stall; diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index ff1d839c..983f520f 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -337,10 +337,10 @@ module VX_decode #( assign decode_tmp_if.use_rs3 = use_rs3; - assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << rd) - | ((`NUM_REGS)'(use_rs1) << rs1_qual) - | ((`NUM_REGS)'(use_rs2) << rs2) - | ((`NUM_REGS)'(use_rs3) << rs3); + assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << decode_tmp_if.rd) + | ((`NUM_REGS)'(use_rs1) << decode_tmp_if.rs1) + | ((`NUM_REGS)'(use_rs2) << decode_tmp_if.rs2) + | ((`NUM_REGS)'(use_rs3) << decode_tmp_if.rs3); assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} : (is_jal || is_jalr || is_jals) ? jalx_offset : diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8e0279b5..d61b001c 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -30,7 +30,7 @@ `define NR_BITS `LOG2UP(`NUM_REGS) -`define CSR_ADDR_SIZE 12 +`define CSR_ADDR_BITS 12 `define CSR_WIDTH 12 @@ -38,8 +38,8 @@ /////////////////////////////////////////////////////////////////////////////// -`define LATENCY_IDIV 24 -`define LATENCY_IMUL 2 +`define LATENCY_IDIV 24 +`define LATENCY_IMUL 2 `define LATENCY_FMULADD 2 `define LATENCY_FDIVSQRT 2 @@ -91,6 +91,7 @@ `define BR_MRET 4'hA `define BR_SRET 4'hB `define BR_DRET 4'hC +`define BR_NO 4'hF `define BR_BITS 4 `define OP_BITS 5 diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 4e678de4..540dfdd8 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -20,12 +20,12 @@ module VX_fpu_unit #( localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS); localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS); - localparam int FPU_DPATHW = `NUM_THREADS * 32; + localparam FPU_DPATHW = 32'd32; localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ Width: FPU_DPATHW, - EnableVectors: 1, - EnableNanBox: 1, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, FpFmtMask: 5'b10000, IntFmtMask: 4'b0010 }; @@ -54,7 +54,7 @@ module VX_fpu_unit #( wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32; wire [`NUM_THREADS-1:0][31:0] fpu_result; - fpnew_pkg::status_t fpu_status; + fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1]; assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num; wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm; @@ -65,7 +65,7 @@ module VX_fpu_unit #( reg [FOP_BITS-1:0] fpu_op; reg [`FRM_BITS-1:0] fpu_rnd; reg fpu_op_mod; - reg fflags_en, fflags_en_o; + reg fflags_en, fflags_en_o; always @(*) begin fpu_op = fpnew_pkg::SGNJ; @@ -87,88 +87,94 @@ module VX_fpu_unit #( fpu_operands[2] = fpu_req_if.rs2_data; fpu_op_mod = 1; end - `FPU_MUL: fpu_op = fpnew_pkg::MUL; - `FPU_DIV: fpu_op = fpnew_pkg::DIV; - `FPU_SQRT: fpu_op = fpnew_pkg::SQRT; - `FPU_MADD: fpu_op = fpnew_pkg::FMADD; - `FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end - `FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB; + `FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end + `FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end + `FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end + `FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end + `FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end + `FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end `FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end `FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end `FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end `FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end `FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end `FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end - `FPU_CVTWS: fpu_op = fpnew_pkg::F2I; + `FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end `FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end - `FPU_CVTSW: fpu_op = fpnew_pkg::I2F; + `FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end `FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end `FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end `FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end `FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end - `FPU_CMP: fpu_op = fpnew_pkg::CMP; + `FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end default:; endcase - end + end + + genvar i; `DISABLE_TRACING - - fpnew_top #( - .Features (FPU_FEATURES), - .Implementation (FPU_IMPLEMENTATION), - .TagType (logic[`LOG2UP(`FPURQ_SIZE)-1+2:0]) - ) fpnew_core ( - .clk_i (clk), - .rst_ni (1'b1), - .operands_i (fpu_operands), - .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)), - .op_i (fpnew_pkg::operation_e'(fpu_op)), - .op_mod_i (fpu_op_mod), - .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)), - .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)), - .int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)), - .vectorial_op_i (1'b1), - .tag_i ({fpu_in_tag, fflags_en, is_class_op_i}), - .in_valid_i (fpu_in_valid), - .in_ready_o (fpu_in_ready), - .flush_i (reset), - .result_o (fpu_result), - .status_o (fpu_status), - .tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}), - .out_valid_o (fpu_out_valid), - .out_ready_i (fpu_out_ready), - `UNUSED_PIN (busy_o) - ); - -`ENABLE_TRACING - - reg [`NUM_THREADS-1:0][31:0] fpu_result_qual; - - always @(8) begin - // unpack classify mask result - if (is_class_op_o) begin - integer i; - for (i = 0; i < `NUM_THREADS; i++) begin - automatic integer l = i / 4; - automatic integer w = i % 4; - automatic integer class_mask = fpu_result[l][w * 8 +: 8]; - - fpu_result_qual[i][0] = class_mask[7] & class_mask[0]; - fpu_result_qual[i][1] = class_mask[7] & class_mask[1]; - fpu_result_qual[i][2] = class_mask[7] & class_mask[2]; - fpu_result_qual[i][3] = class_mask[7] & class_mask[3]; - fpu_result_qual[i][4] = class_mask[6] & class_mask[3]; - fpu_result_qual[i][5] = class_mask[6] & class_mask[2]; - fpu_result_qual[i][6] = class_mask[6] & class_mask[1]; - fpu_result_qual[i][7] = class_mask[6] & class_mask[0]; - fpu_result_qual[i][8] = class_mask[4]; - fpu_result_qual[i][9] = class_mask[5]; - end + + for (i = 0; i < `NUM_THREADS; i++) begin + if (0 == i) begin + fpnew_top #( + .Features (FPU_FEATURES), + .Implementation (FPU_IMPLEMENTATION), + .TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0]) + ) fpnew_core ( + .clk_i (clk), + .rst_ni (1'b1), + .operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}), + .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)), + .op_i (fpnew_pkg::operation_e'(fpu_op)), + .op_mod_i (fpu_op_mod), + .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)), + .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)), + .int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)), + .vectorial_op_i (1'b0), + .tag_i ({fpu_in_tag, fflags_en, is_class_op_i}), + .in_valid_i (fpu_in_valid), + .in_ready_o (fpu_in_ready), + .flush_i (reset), + .result_o (fpu_result), + .status_o (fpu_status[0]), + .tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}), + .out_valid_o (fpu_out_valid), + .out_ready_i (fpu_out_ready), + `UNUSED_PIN (busy_o) + ); end else begin - fpu_result_qual = fpu_result; + fpnew_top #( + .Features (FPU_FEATURES), + .Implementation (FPU_IMPLEMENTATION), + .TagType (logic) + ) fpnew_core ( + .clk_i (clk), + .rst_ni (1'b1), + .operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}), + .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)), + .op_i (fpnew_pkg::operation_e'(fpu_op)), + .op_mod_i (fpu_op_mod), + .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)), + .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)), + .int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)), + .vectorial_op_i (1'b0), + .tag_i (1'b0), + .in_valid_i (fpu_in_valid), + `UNUSED_PIN (in_ready_o), + .flush_i (reset), + .result_o (fpu_result[i]), + .status_o (fpu_status[i]), + `UNUSED_PIN (tag_o), + `UNUSED_PIN (out_valid_o), + .out_ready_i (fpu_out_ready), + `UNUSED_PIN (busy_o) + ); end end +`ENABLE_TRACING + assign fpu_in_valid = fpu_req_if.valid; assign fpu_in_tag = fpu_req_if.issue_tag; @@ -177,15 +183,18 @@ module VX_fpu_unit #( assign fpu_commit_if.valid = fpu_out_valid; assign fpu_commit_if.issue_tag = fpu_out_tag; - assign fpu_commit_if.data = fpu_result_qual; + assign fpu_commit_if.data = fpu_result; - assign fpu_commit_if.upd_fflags = fflags_en_o; - assign fpu_commit_if.fflags_NV = fpu_status.NV; - assign fpu_commit_if.fflags_DZ = fpu_status.DZ; - assign fpu_commit_if.fflags_OF = fpu_status.OF; - assign fpu_commit_if.fflags_UF = fpu_status.UF; - assign fpu_commit_if.fflags_NX = fpu_status.NX; + assign fpu_commit_if.upd_fflags = fflags_en_o; + for (i = 0; i < `NUM_THREADS; i++) begin + assign fpu_commit_if.fflags[i][0] = fpu_status[i].NX; + assign fpu_commit_if.fflags[i][1] = fpu_status[i].UF; + assign fpu_commit_if.fflags[i][2] = fpu_status[i].OF; + assign fpu_commit_if.fflags[i][3] = fpu_status[i].DZ; + assign fpu_commit_if.fflags[i][4] = fpu_status[i].NV; + end + assign fpu_out_ready = fpu_commit_if.ready; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index bc77503a..fad3fcd4 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -46,7 +46,7 @@ module VX_icache_stage #( assign ifetch_req_if.ready = icache_req_if.ready; `ifdef DBG_CORE_REQ_INFO - assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, req_tag}; + assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag}; `else assign icache_req_if.tag = req_tag; `endif diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 1967df71..78b8d2d2 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -97,7 +97,7 @@ module VX_issue #( $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC); end if (lsu_req_if.valid && lsu_req_if.ready) begin - $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset); + $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); end if (csr_req_if.valid && csr_req_if.ready) begin $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask); diff --git a/hw/rtl/VX_issue_demux.v b/hw/rtl/VX_issue_demux.v index 3ab2f160..5e5a9672 100644 --- a/hw/rtl/VX_issue_demux.v +++ b/hw/rtl/VX_issue_demux.v @@ -46,7 +46,7 @@ module VX_issue_demux ( assign csr_req_if.warp_num = decode_if.warp_num; assign csr_req_if.curr_PC = decode_if.curr_PC; assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op); - assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0]; + assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0]; assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0]; assign csr_req_if.is_io = 1'b0; diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 08f65575..22afdec1 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -49,7 +49,7 @@ module VX_scheduler #( for (i = 0; i < `NUM_REGS; i++) begin inuse_registers[w][i] <= 0; end - inuse_reg_mask[w] <= 0; + inuse_reg_mask[w] <= `NUM_REGS'(0); end end else begin if (acquire_rd) begin diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 953db5d1..051fe41c 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -24,9 +24,10 @@ module VX_writeback #( reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0]; reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0]; reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0]; - reg [`ISSUEQ_SIZE-1:0] wb_pending, wb_pending_n; - reg [`ISTAG_BITS-1:0] wb_index; + reg [`ISSUEQ_SIZE-1:0] wb_pending; + reg [`ISSUEQ_SIZE-1:0] wb_pending_n; + reg [`ISTAG_BITS-1:0] wb_index; wire [`ISTAG_BITS-1:0] wb_index_n; reg wb_valid; @@ -67,6 +68,8 @@ module VX_writeback #( always @(posedge clk) begin if (reset) begin wb_pending <= 0; + wb_index <= 0; + wb_valid <= 0; end else begin if (alu_commit_if.valid) begin wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index f0692c5b..57992793 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -105,7 +105,7 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO /* verilator lint_off UNUSED */ - wire[31:0] debug_use_pc_st0; + wire[31:0] debug_pc_st0; wire debug_wb_st0; wire[`NR_BITS-1:0] debug_rd_st0; wire[`NW_BITS-1:0] debug_warp_num_st0; @@ -114,7 +114,7 @@ module VX_bank #( wire[`REQS_BITS-1:0] debug_tid_st0; wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0; - wire[31:0] debug_use_pc_st1e; + wire[31:0] debug_pc_st1e; wire debug_wb_st1e; wire[`NR_BITS-1:0] debug_rd_st1e; wire[`NW_BITS-1:0] debug_warp_num_st1e; @@ -123,7 +123,7 @@ module VX_bank #( wire[`REQS_BITS-1:0] debug_tid_st1e; wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e; - wire[31:0] debug_use_pc_st2; + wire[31:0] debug_pc_st2; wire debug_wb_st2; wire[`NR_BITS-1:0] debug_rd_st2; wire[`NW_BITS-1:0] debug_warp_num_st2; @@ -360,7 +360,7 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0; + assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0; end `endif @@ -432,6 +432,9 @@ module VX_bank #( && (addr_st2 == addr_st1e); VX_tag_data_access #( + .BANK_ID (BANK_ID), + .CACHE_ID (CACHE_ID), + .CORE_TAG_ID_BITS(CORE_TAG_ID_BITS), .CACHE_SIZE (CACHE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -442,6 +445,15 @@ module VX_bank #( ) tag_data_access ( .clk (clk), .reset (reset), + +`ifdef DBG_CORE_REQ_INFO + .debug_pc_st1e(debug_pc_st1e), + .debug_wb_st1e(debug_wb_st1e), + .debug_rd_st1e(debug_rd_st1e), + .debug_warp_num_st1e(debug_warp_num_st1e), + .debug_tagid_st1e(debug_tagid_st1e), +`endif + .stall (stall_bank_pipe), .stall_bank_pipe(stall_bank_pipe), @@ -478,7 +490,7 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; + assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; end `endif @@ -519,7 +531,7 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; + assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; end `endif diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 04e99336..13e52cb7 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -1,6 +1,9 @@ `include "VX_cache_config.vh" module VX_tag_data_access #( + parameter CACHE_ID = 0, + parameter BANK_ID = 0, + parameter CORE_TAG_ID_BITS = 0, // Size of cache in bytes parameter CACHE_SIZE = 0, // Size of line inside a bank in bytes @@ -22,6 +25,14 @@ module VX_tag_data_access #( input wire clk, input wire reset, +`ifdef DBG_CORE_REQ_INFO + input wire[31:0] debug_pc_st1e, + input wire debug_wb_st1e, + input wire[`NR_BITS-1:0] debug_rd_st1e, + input wire[`NW_BITS-1:0] debug_warp_num_st1e, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e, +`endif + input wire stall, input wire is_snp_st1e, input wire snp_invalidate_st1e, @@ -85,10 +96,10 @@ module VX_tag_data_access #( wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0]; VX_tag_data_store #( - .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE) + .CACHE_SIZE (CACHE_SIZE), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE) ) tag_data_store ( .clk (clk), .reset (reset), @@ -125,7 +136,7 @@ module VX_tag_data_access #( genvar i; for (i = 1; i < STAGE_1_CYCLES-1; i++) begin VX_generic_register #( - .N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) + .N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) ) s0_1_cc ( .clk (clk), .reset (reset), @@ -200,4 +211,23 @@ module VX_tag_data_access #( assign fill_saw_dirty_st1e = real_writefill && dirty_st1e; assign invalidate_line = snoop_hit_no_pending; +`ifdef DBG_PRINT_CACHE_BANK + always @(posedge clk) begin + if (valid_req_st1e) begin + if ((| use_write_enable)) begin + if (writefill_st1e) begin + $display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data); + end else begin + $display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e); + end + end else + if (miss_st1e) begin + $display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e); + end else begin + $display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1); + end + end + end +`endif + endmodule \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index 7badc5bf..2b712342 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -6,15 +6,13 @@ interface VX_cmt_to_csr_if (); wire valid; + + wire [`NW_BITS-1:0] warp_num; + wire [`NE_BITS:0] num_commits; wire upd_fflags; - wire [`NW_BITS-1:0] fpu_warp_num; - wire fflags_NV; - wire fflags_DZ; - wire fflags_OF; - wire fflags_UF; - wire fflags_NX; + wire [`FFG_BITS-1:0] fflags; endinterface diff --git a/hw/rtl/interfaces/VX_csr_io_req_if.v b/hw/rtl/interfaces/VX_csr_io_req_if.v index 716887d3..a5307a08 100644 --- a/hw/rtl/interfaces/VX_csr_io_req_if.v +++ b/hw/rtl/interfaces/VX_csr_io_req_if.v @@ -5,11 +5,11 @@ interface VX_csr_io_req_if (); - wire valid; - wire [`CSR_ADDR_SIZE-1:0] addr; - wire rw; - wire [31:0] data; - wire ready; + wire valid; + wire [`CSR_ADDR_BITS-1:0] addr; + wire rw; + wire [31:0] data; + wire ready; endinterface diff --git a/hw/rtl/interfaces/VX_csr_req_if.v b/hw/rtl/interfaces/VX_csr_req_if.v index 92c5b82a..b29cc649 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.v +++ b/hw/rtl/interfaces/VX_csr_req_if.v @@ -12,7 +12,7 @@ interface VX_csr_req_if (); wire [`CSR_BITS-1:0] csr_op; - wire [`CSR_ADDR_SIZE-1:0] csr_addr; + wire [`CSR_ADDR_BITS-1:0] csr_addr; wire [31:0] csr_mask; wire [`NR_BITS-1:0] rd; diff --git a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v index ca2cb619..cce2e293 100644 --- a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v +++ b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v @@ -9,11 +9,7 @@ interface VX_fpu_to_cmt_if (); wire [`ISTAG_BITS-1:0] issue_tag; wire [`NUM_THREADS-1:0][31:0] data; wire upd_fflags; - wire fflags_NV; - wire fflags_DZ; - wire fflags_OF; - wire fflags_UF; - wire fflags_NX; + wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags; wire ready; endinterface diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 77c2a151..c232cc09 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -15,7 +15,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_FLAGS += $(DBG_PRINT_FLAGS) -#DBG_FLAGS += -DDBG_CORE_REQ_INFO +DBG_FLAGS += -DDBG_CORE_REQ_INFO FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate $(FPU_INCLUDE) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 90fd338b..1c30a870 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -210,7 +210,7 @@ void Simulator::wait(uint32_t cycles) { } } -bool Simulator::is_busy() { +bool Simulator::is_busy() const { return vortex_->busy || snp_req_active_; } @@ -255,11 +255,11 @@ void Simulator::run() { this->wait(5); } -int Simulator::get_last_wb_value(int reg) { - #if (NUM_CLUSTERS == 1) - return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; +int Simulator::get_last_wb_value(int reg) const { + #if (NUM_CLUSTERS != 1) + return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; #else - return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; + return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; #endif } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index 43052685..e104b66b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -34,7 +34,7 @@ public: void load_bin(const char* program_file); void load_ihex(const char* program_file); - bool is_busy(); + bool is_busy() const; void reset(); void step(); @@ -44,7 +44,7 @@ public: void attach_ram(RAM* ram); void run(); - int get_last_wb_value(int reg); + int get_last_wb_value(int reg) const; void print_stats(std::ostream& out); private: diff --git a/runtime/src/vx_intrinsics.S b/runtime/src/vx_intrinsics.S index c6b0008d..1e99c794 100644 --- a/runtime/src/vx_intrinsics.S +++ b/runtime/src/vx_intrinsics.S @@ -83,11 +83,11 @@ vx_num_cores: .type vx_num_cycles, @function .global vx_num_cycles vx_num_cycles: - csrr a0, CSR_CYCLE_L + csrr a0, CSR_CYCLE ret .type vx_num_instrs, @function .global vx_num_instrs vx_num_instrs: - csrr a0, CSR_INSTR_L + csrr a0, CSR_INSTRET ret \ No newline at end of file diff --git a/runtime/tests/simple/vx_simple.dump b/runtime/tests/simple/vx_simple.dump index 6ab5c371..9b62df87 100644 --- a/runtime/tests/simple/vx_simple.dump +++ b/runtime/tests/simple/vx_simple.dump @@ -454,7 +454,7 @@ Disassembly of section .text: 80000698: 0005006b 0x5006b 8000069c: 00001197 auipc gp,0x1 800006a0: 16c18193 addi gp,gp,364 # 80001808 <__global_pointer$> -800006a4: f14025f3 csrr a1,mhartid +800006a4: 022025f3 csrr a1,0x22 800006a8: 00a59593 slli a1,a1,0xa 800006ac: 02002673 csrr a2,0x20 800006b0: 00261613 slli a2,a2,0x2 @@ -502,7 +502,7 @@ Disassembly of section .text: 80000710: 00008067 ret 80000714 : -80000714: f1402573 csrr a0,mhartid +80000714: 02202573 csrr a0,0x22 80000718: 00008067 ret 8000071c : diff --git a/runtime/tests/simple/vx_simple.elf b/runtime/tests/simple/vx_simple.elf index a7a3480a8f19697ed7a6eff96d9dbff0e67d3260..364d27aefa03695692462d1128ba50e29136e5fe 100755 GIT binary patch delta 22 ccmcbTcqMVeGBG9vrpYVB@_^K43GqFW0A@=El>h($ delta 22 ccmcbTcqMVeGBGBHkCRu3