lkg build with pipeline + FPU fixes
This commit is contained in:
8
Makefile
8
Makefile
@@ -3,4 +3,10 @@ all:
|
|||||||
$(MAKE) -C hw
|
$(MAKE) -C hw
|
||||||
$(MAKE) -C driver
|
$(MAKE) -C driver
|
||||||
$(MAKE) -C runtime
|
$(MAKE) -C runtime
|
||||||
$(MAKE) -C simX
|
$(MAKE) -C simX
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(MAKE) -C hw clean
|
||||||
|
$(MAKE) -C driver clean
|
||||||
|
$(MAKE) -C runtime clean
|
||||||
|
$(MAKE) -C simX clean
|
||||||
@@ -37,7 +37,7 @@ Install development tools
|
|||||||
|
|
||||||
Install gnu-riscv-tools
|
Install gnu-riscv-tools
|
||||||
|
|
||||||
$ export RISC_GNU_TOOLS_PATH=/opt/riscv-gnu-toolchain
|
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
|
||||||
|
|
||||||
$ sudo apt-get -y install \
|
$ sudo apt-get -y install \
|
||||||
binutils build-essential libtool texinfo \
|
binutils build-essential libtool texinfo \
|
||||||
@@ -51,7 +51,7 @@ Install gnu-riscv-tools
|
|||||||
$ git submodule update --init --recursive
|
$ git submodule update --init --recursive
|
||||||
$ mkdir build
|
$ mkdir build
|
||||||
$ cd build
|
$ cd build
|
||||||
$ ../configure --prefix=$RISC_GNU_TOOLS_PATH --with-arch=rv32im --with-abi=ilp32
|
$ ../configure --prefix=$RISCV_TOOLCHAIN_PATH --with-arch=rv32im --with-abi=ilp32
|
||||||
$ make -j`nproc`
|
$ make -j`nproc`
|
||||||
$ make -j`nproc` build-qemu
|
$ make -j`nproc` build-qemu
|
||||||
|
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
{
|
{
|
||||||
// Load device CAPS
|
// Load device CAPS
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
ret |= vx_csr_get(device, 0, CSR_IMPL_ID, &device->implementation_id);
|
ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id);
|
||||||
ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores);
|
ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores);
|
||||||
ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps);
|
ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps);
|
||||||
ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads);
|
ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads);
|
||||||
@@ -217,14 +217,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
unsigned value;
|
unsigned value;
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
ret |= vx_csr_get(hdevice, 0, CSR_INSTR_H, &value);
|
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET_H, &value);
|
||||||
instrs = value;
|
instrs = value;
|
||||||
ret |= vx_csr_get(hdevice, 0, CSR_INSTR_L, &value);
|
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET, &value);
|
||||||
instrs = (instrs << 32) | value;
|
instrs = (instrs << 32) | value;
|
||||||
|
|
||||||
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_H, &value);
|
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_H, &value);
|
||||||
cycles = value;
|
cycles = value;
|
||||||
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_L, &value);
|
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE, &value);
|
||||||
cycles = (cycles << 32) | value;
|
cycles = (cycles << 32) | value;
|
||||||
|
|
||||||
float IPC = (float)(double(instrs) / double(cycles));
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
|
|||||||
BIN
driver/tests/demo/kernel.bin
Normal file → Executable file
BIN
driver/tests/demo/kernel.bin
Normal file → Executable file
Binary file not shown.
@@ -97,7 +97,7 @@ Disassembly of section .text:
|
|||||||
80000134: 0005006b 0x5006b
|
80000134: 0005006b 0x5006b
|
||||||
80000138: 00002197 auipc gp,0x2
|
80000138: 00002197 auipc gp,0x2
|
||||||
8000013c: d3818193 addi gp,gp,-712 # 80001e70 <__global_pointer$>
|
8000013c: d3818193 addi gp,gp,-712 # 80001e70 <__global_pointer$>
|
||||||
80000140: f14025f3 csrr a1,mhartid
|
80000140: 022025f3 csrr a1,0x22
|
||||||
80000144: 00a59593 slli a1,a1,0xa
|
80000144: 00a59593 slli a1,a1,0xa
|
||||||
80000148: 02002673 csrr a2,0x20
|
80000148: 02002673 csrr a2,0x20
|
||||||
8000014c: 00261613 slli a2,a2,0x2
|
8000014c: 00261613 slli a2,a2,0x2
|
||||||
@@ -145,7 +145,7 @@ Disassembly of section .text:
|
|||||||
800001ac: 00008067 ret
|
800001ac: 00008067 ret
|
||||||
|
|
||||||
800001b0 <vx_thread_gid>:
|
800001b0 <vx_thread_gid>:
|
||||||
800001b0: f1402573 csrr a0,mhartid
|
800001b0: 02202573 csrr a0,0x22
|
||||||
800001b4: 00008067 ret
|
800001b4: 00008067 ret
|
||||||
|
|
||||||
800001b8 <vx_core_id>:
|
800001b8 <vx_core_id>:
|
||||||
|
|||||||
Binary file not shown.
@@ -1,4 +1,7 @@
|
|||||||
.PHONY: build_config
|
.PHONY: build_config
|
||||||
|
|
||||||
build_config:
|
build_config:
|
||||||
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
|
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm ./rtl/VX_user_config.vh ./VX_config.h
|
||||||
@@ -48,7 +48,7 @@ module VX_alu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`NT_BITS-1:0] br_result_index, br_result_index_o;
|
wire [`NT_BITS-1:0] br_result_index;
|
||||||
|
|
||||||
VX_priority_encoder #(
|
VX_priority_encoder #(
|
||||||
.N(`NUM_THREADS)
|
.N(`NUM_THREADS)
|
||||||
@@ -58,8 +58,14 @@ module VX_alu_unit #(
|
|||||||
`UNUSED_PIN (valid_out)
|
`UNUSED_PIN (valid_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : 0;
|
wire [32:0] br_result = sub_result[br_result_index];
|
||||||
wire [`BR_BITS-1:0] br_op_o;
|
wire br_sign = br_result[32];
|
||||||
|
wire br_nzero = (| br_result[31:0]);
|
||||||
|
wire br_sign_s1;
|
||||||
|
wire br_nzero_s1;
|
||||||
|
|
||||||
|
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO;
|
||||||
|
wire [`BR_BITS-1:0] br_op_s1;
|
||||||
|
|
||||||
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
|
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
|
||||||
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
||||||
@@ -70,34 +76,30 @@ module VX_alu_unit #(
|
|||||||
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + `NT_BITS)
|
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1)
|
||||||
) alu_reg (
|
) alu_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_result_index}),
|
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}),
|
||||||
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_o, branch_ctl_if.dest, br_result_index_o})
|
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1})
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [31:0] br_result = alu_commit_if.data[br_result_index_o];
|
|
||||||
wire br_sign = br_result[31];
|
|
||||||
wire br_nzero = (| br_result[31:0]);
|
|
||||||
|
|
||||||
reg br_taken;
|
reg br_taken;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (br_op_o)
|
case (br_op_s1)
|
||||||
`BR_NE: br_taken = br_nzero;
|
`BR_NE: br_taken = br_nzero_s1;
|
||||||
`BR_EQ: br_taken = ~br_nzero;
|
`BR_EQ: br_taken = ~br_nzero_s1;
|
||||||
`BR_LT,
|
`BR_LT,
|
||||||
`BR_LTU: br_taken = br_sign;
|
`BR_LTU: br_taken = br_sign_s1;
|
||||||
`BR_GE,
|
`BR_GE,
|
||||||
`BR_GEU: br_taken = ~br_sign;
|
`BR_GEU: br_taken = ~br_sign_s1;
|
||||||
default: br_taken = 1'b1;
|
default: br_taken = 1'b1;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
assign branch_ctl_if.valid = alu_req_if.valid && (br_op_o != 0);
|
assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO);
|
||||||
assign branch_ctl_if.taken = br_taken;
|
assign branch_ctl_if.taken = br_taken;
|
||||||
|
|
||||||
assign alu_req_if.ready = ~stall;
|
assign alu_req_if.ready = ~stall;
|
||||||
|
|||||||
@@ -39,15 +39,27 @@ module VX_commit #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||||
|
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
||||||
assign cmt_to_csr_if.num_commits = num_commits;
|
assign cmt_to_csr_if.num_commits = num_commits;
|
||||||
|
|
||||||
|
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
|
||||||
|
|
||||||
|
integer i;
|
||||||
|
|
||||||
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
|
reg [`FFG_BITS-1:0] fflags;
|
||||||
assign cmt_to_csr_if.fpu_warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
always @(*) begin
|
||||||
assign cmt_to_csr_if.fflags_NV = fpu_commit_if.fflags_NV;
|
fflags = 0;
|
||||||
assign cmt_to_csr_if.fflags_DZ = fpu_commit_if.fflags_DZ;
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign cmt_to_csr_if.fflags_OF = fpu_commit_if.fflags_OF;
|
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||||
assign cmt_to_csr_if.fflags_UF = fpu_commit_if.fflags_UF;
|
fflags[0] |= fpu_commit_if.fflags[i][0];
|
||||||
assign cmt_to_csr_if.fflags_NX = fpu_commit_if.fflags_NX;
|
fflags[1] |= fpu_commit_if.fflags[i][1];
|
||||||
|
fflags[2] |= fpu_commit_if.fflags[i][2];
|
||||||
|
fflags[3] |= fpu_commit_if.fflags[i][3];
|
||||||
|
fflags[4] |= fpu_commit_if.fflags[i][4];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign cmt_to_csr_if.fflags = fflags;
|
||||||
|
|
||||||
// Notify issue stage
|
// Notify issue stage
|
||||||
|
|
||||||
|
|||||||
@@ -27,10 +27,6 @@
|
|||||||
`define GLOBAL_BLOCK_SIZE 16
|
`define GLOBAL_BLOCK_SIZE 16
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef NUM_CSRS
|
|
||||||
`define NUM_CSRS 64
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef STARTUP_ADDR
|
`ifndef STARTUP_ADDR
|
||||||
`define STARTUP_ADDR 32'h80000000
|
`define STARTUP_ADDR 32'h80000000
|
||||||
`endif
|
`endif
|
||||||
@@ -59,38 +55,11 @@
|
|||||||
|
|
||||||
`define EXT_F_ENABLE
|
`define EXT_F_ENABLE
|
||||||
|
|
||||||
// Configuration Values =======================================================
|
// Device identification
|
||||||
|
|
||||||
`define VENDOR_ID 0
|
`define VENDOR_ID 0
|
||||||
`define ARCHITECTURE_ID 0
|
`define ARCHITECTURE_ID 0
|
||||||
`define IMPLEMENTATION_ID 0
|
`define IMPLEMENTATION_ID 0
|
||||||
|
|
||||||
// CSR Addresses ==============================================================
|
|
||||||
|
|
||||||
`define CSR_FFLAGS 12'h001
|
|
||||||
`define CSR_FRM 12'h002
|
|
||||||
`define CSR_FCSR 12'h003
|
|
||||||
|
|
||||||
`define CSR_VEND_ID 12'hF11
|
|
||||||
`define CSR_ARCH_ID 12'hF12
|
|
||||||
`define CSR_IMPL_ID 12'hF13
|
|
||||||
`define CSR_GTID 12'hF14
|
|
||||||
|
|
||||||
`define CSR_LTID 12'h020
|
|
||||||
`define CSR_LWID 12'h021
|
|
||||||
`define CSR_GWID 12'h023
|
|
||||||
`define CSR_GCID 12'h024
|
|
||||||
`define CSR_NT 12'h025
|
|
||||||
`define CSR_NW 12'h026
|
|
||||||
`define CSR_NC 12'h027
|
|
||||||
|
|
||||||
`define CSR_CYCLE_L 12'hC00
|
|
||||||
`define CSR_CYCLE_H 12'hC80
|
|
||||||
`define CSR_INSTR_L 12'hC02
|
|
||||||
`define CSR_INSTR_H 12'hC82
|
|
||||||
|
|
||||||
`define CSR_MISA 12'h301
|
|
||||||
|
|
||||||
// Size of MUL Request Queue Size
|
// Size of MUL Request Queue Size
|
||||||
`ifndef MULRQ_SIZE
|
`ifndef MULRQ_SIZE
|
||||||
`define MULRQ_SIZE 8
|
`define MULRQ_SIZE 8
|
||||||
@@ -106,6 +75,45 @@
|
|||||||
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
|
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
`define CSR_FFLAGS 12'h001
|
||||||
|
`define CSR_FRM 12'h002
|
||||||
|
`define CSR_FCSR 12'h003
|
||||||
|
|
||||||
|
`define CSR_LTID 12'h020
|
||||||
|
`define CSR_LWID 12'h021
|
||||||
|
`define CSR_GTID 12'h022
|
||||||
|
`define CSR_GWID 12'h023
|
||||||
|
`define CSR_GCID 12'h024
|
||||||
|
`define CSR_NT 12'h025
|
||||||
|
`define CSR_NW 12'h026
|
||||||
|
`define CSR_NC 12'h027
|
||||||
|
|
||||||
|
`define CSR_SATP 12'h180
|
||||||
|
|
||||||
|
`define CSR_PMPCFG0 12'h3A0
|
||||||
|
`define CSR_PMPADDR0 12'h3B0
|
||||||
|
|
||||||
|
`define CSR_MSTATUS 12'h300
|
||||||
|
`define CSR_MISA 12'h301
|
||||||
|
`define CSR_MEDELEG 12'h302
|
||||||
|
`define CSR_MIDELEG 12'h303
|
||||||
|
`define CSR_MIE 12'h304
|
||||||
|
`define CSR_MTVEC 12'h305
|
||||||
|
|
||||||
|
`define CSR_MEPC 12'h341
|
||||||
|
|
||||||
|
`define CSR_CYCLE 12'hC00
|
||||||
|
`define CSR_CYCLE_H 12'hC80
|
||||||
|
`define CSR_INSTRET 12'hC02
|
||||||
|
`define CSR_INSTRET_H 12'hC82
|
||||||
|
|
||||||
|
`define CSR_MVENDORID 12'hF11
|
||||||
|
`define CSR_MARCHID 12'hF12
|
||||||
|
`define CSR_MIMPID 12'hF13
|
||||||
|
`define CSR_MHARTID 12'hF14
|
||||||
|
|
||||||
// Dcache Configurable Knobs ==================================================
|
// Dcache Configurable Knobs ==================================================
|
||||||
|
|
||||||
// Size of cache in bytes
|
// Size of cache in bytes
|
||||||
|
|||||||
@@ -11,107 +11,129 @@ module VX_csr_data #(
|
|||||||
|
|
||||||
input wire[`NW_BITS-1:0] warp_num,
|
input wire[`NW_BITS-1:0] warp_num,
|
||||||
|
|
||||||
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
input wire read_enable,
|
||||||
|
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||||
output reg[31:0] read_data,
|
output reg[31:0] read_data,
|
||||||
|
|
||||||
input wire write_enable,
|
input wire write_enable,
|
||||||
`IGNORE_WARNINGS_BEGIN
|
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||||
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
|
|
||||||
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
|
|
||||||
`IGNORE_WARNINGS_END
|
|
||||||
input wire[`CSR_WIDTH-1:0] write_data
|
input wire[`CSR_WIDTH-1:0] write_data
|
||||||
);
|
);
|
||||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
|
||||||
|
|
||||||
reg [`FFG_BITS+`FRM_BITS-1:0] fflags_table [`NUM_WARPS-1:0];
|
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||||
reg [`FRM_BITS-1:0] frm_table [`NUM_WARPS-1:0];
|
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||||
reg [`FFG_BITS+`FRM_BITS-1:0] fcsr_table [`NUM_WARPS-1:0]; // fflags + frm
|
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
||||||
|
reg [`CSR_WIDTH-1:0] csr_mideleg;
|
||||||
// cast address to physical CSR range
|
reg [`CSR_WIDTH-1:0] csr_mie;
|
||||||
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
reg [`CSR_WIDTH-1:0] csr_mtvec;
|
||||||
assign rd_addr = $size(rd_addr)'(read_addr);
|
reg [`CSR_WIDTH-1:0] csr_mepc;
|
||||||
assign wr_addr = $size(wr_addr)'(write_addr);
|
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
|
||||||
|
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
|
||||||
wire [`FFG_BITS-1:0] fflags_update;
|
reg [63:0] csr_cycle;
|
||||||
assign fflags_update[4] = cmt_to_csr_if.fflags_NV;
|
reg [63:0] csr_instret;
|
||||||
assign fflags_update[3] = cmt_to_csr_if.fflags_DZ;
|
|
||||||
assign fflags_update[2] = cmt_to_csr_if.fflags_OF;
|
reg [`FFG_BITS-1:0] csr_fflags [`NUM_WARPS-1:0];
|
||||||
assign fflags_update[1] = cmt_to_csr_if.fflags_UF;
|
reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0];
|
||||||
assign fflags_update[0] = cmt_to_csr_if.fflags_NX;
|
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
|
||||||
|
|
||||||
integer i;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (cmt_to_csr_if.upd_fflags) begin
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
|
||||||
fflags_table[i] <= 0;
|
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
||||||
frm_table[i] <= 0;
|
end
|
||||||
fcsr_table[i] <= 0;
|
|
||||||
end
|
if (write_enable) begin
|
||||||
end else begin
|
case (write_addr)
|
||||||
if (write_enable) begin
|
`CSR_FFLAGS: begin
|
||||||
case (write_addr)
|
csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||||
`CSR_FFLAGS: begin
|
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||||
fcsr_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
end
|
||||||
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
`CSR_FRM: begin
|
||||||
end
|
csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||||
`CSR_FRM: begin
|
csr_frm[warp_num] <= write_data[`FRM_BITS-1:0];
|
||||||
fcsr_table[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
end
|
||||||
frm_table[warp_num] <= write_data[`FRM_BITS-1:0];
|
`CSR_FCSR: begin
|
||||||
end
|
csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||||
`CSR_FCSR: begin
|
csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||||
fcsr_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||||
frm_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
end
|
||||||
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
`CSR_SATP: csr_satp <= write_data;
|
||||||
end
|
|
||||||
default: begin
|
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||||
csr_table[wr_addr] <= write_data;
|
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||||
|
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||||
|
`CSR_MIE: csr_mie <= write_data;
|
||||||
|
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||||
|
|
||||||
|
`CSR_MEPC: csr_mepc <= write_data;
|
||||||
|
|
||||||
|
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||||
|
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||||
|
|
||||||
|
default: begin
|
||||||
|
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
end else if (cmt_to_csr_if.upd_fflags) begin
|
|
||||||
fflags_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
|
|
||||||
fcsr_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
reg [63:0] total_cycles, total_instrs;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
total_cycles <= 0;
|
csr_cycle <= 0;
|
||||||
total_instrs <= 0;
|
csr_instret <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
total_cycles <= total_cycles + 1;
|
csr_cycle <= csr_cycle + 1;
|
||||||
if (cmt_to_csr_if.valid) begin
|
if (cmt_to_csr_if.valid) begin
|
||||||
total_instrs <= total_instrs + 64'(cmt_to_csr_if.num_commits);
|
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (read_addr)
|
case (read_addr)
|
||||||
`CSR_FFLAGS : read_data = 32'(fflags_table[warp_num]);
|
`CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]);
|
||||||
`CSR_FRM : read_data = 32'(frm_table[warp_num]);
|
`CSR_FRM : read_data = 32'(csr_frm[warp_num]);
|
||||||
`CSR_FCSR : read_data = 32'(fcsr_table[warp_num]);
|
`CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]);
|
||||||
|
|
||||||
`CSR_LWID : read_data = 32'(warp_num);
|
`CSR_LWID : read_data = 32'(warp_num);
|
||||||
|
`CSR_LTID ,
|
||||||
`CSR_GTID ,
|
`CSR_GTID ,
|
||||||
|
`CSR_MHARTID ,
|
||||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||||
`CSR_GCID : read_data = CORE_ID;
|
`CSR_GCID : read_data = CORE_ID;
|
||||||
`CSR_NT : read_data = `NUM_THREADS;
|
`CSR_NT : read_data = `NUM_THREADS;
|
||||||
`CSR_NW : read_data = `NUM_WARPS;
|
`CSR_NW : read_data = `NUM_WARPS;
|
||||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||||
`CSR_CYCLE_L : read_data = total_cycles[31:0];
|
|
||||||
`CSR_CYCLE_H : read_data = total_cycles[63:32];
|
`CSR_SATP : read_data = 32'(csr_satp);
|
||||||
`CSR_INSTR_L : read_data = total_instrs[31:0];
|
|
||||||
`CSR_INSTR_H : read_data = total_instrs[63:32];
|
`CSR_MSTATUS : read_data = 32'(csr_mstatus);
|
||||||
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
|
||||||
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
|
||||||
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
|
||||||
`CSR_MISA : read_data = `ISA_CODE;
|
`CSR_MISA : read_data = `ISA_CODE;
|
||||||
default : read_data = 32'(csr_table[rd_addr]);
|
`CSR_MEDELEG : read_data = 32'(csr_medeleg);
|
||||||
|
`CSR_MIDELEG : read_data = 32'(csr_mideleg);
|
||||||
|
`CSR_MIE : read_data = 32'(csr_mie);
|
||||||
|
`CSR_MTVEC : read_data = 32'(csr_mtvec);
|
||||||
|
|
||||||
|
`CSR_MEPC : read_data = 32'(csr_mepc);
|
||||||
|
|
||||||
|
`CSR_PMPCFG0 : read_data = 32'(csr_pmpcfg[0]);
|
||||||
|
`CSR_PMPADDR0: read_data = 32'(csr_pmpaddr[0]);
|
||||||
|
|
||||||
|
`CSR_CYCLE : read_data = csr_cycle[31:0];
|
||||||
|
`CSR_CYCLE_H : read_data = csr_cycle[63:32];
|
||||||
|
`CSR_INSTRET : read_data = csr_instret[31:0];
|
||||||
|
`CSR_INSTRET_H:read_data = csr_instret[63:32];
|
||||||
|
|
||||||
|
`CSR_MVENDORID:read_data = `VENDOR_ID;
|
||||||
|
`CSR_MARCHID : read_data = `ARCHITECTURE_ID;
|
||||||
|
`CSR_MIMPID : read_data = `IMPLEMENTATION_ID;
|
||||||
|
|
||||||
|
default: begin
|
||||||
|
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||||
|
end
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
assign csr_to_fpu_if.frm = frm_table[csr_to_fpu_if.warp_num];
|
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num];
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -37,12 +37,11 @@ module VX_csr_unit #(
|
|||||||
.select_io_rsp (select_io_rsp)
|
.select_io_rsp (select_io_rsp)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
wire csr_we_s1;
|
||||||
wire [31:0] csr_read_data_s2;
|
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||||
wire [31:0] csr_updated_data_s2;
|
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||||
wire [31:0] csr_read_data_unqual;
|
wire [31:0] csr_updated_data_s1;
|
||||||
|
wire [`NW_BITS-1:0] warp_num_s1;
|
||||||
wire is_csr_s2 = csr_pipe_commit_if.valid;
|
|
||||||
|
|
||||||
VX_csr_data #(
|
VX_csr_data #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -51,51 +50,64 @@ module VX_csr_unit #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.cmt_to_csr_if (cmt_to_csr_if),
|
.cmt_to_csr_if (cmt_to_csr_if),
|
||||||
.csr_to_fpu_if (csr_to_fpu_if),
|
.csr_to_fpu_if (csr_to_fpu_if),
|
||||||
|
.read_enable (csr_pipe_req_if.valid),
|
||||||
.read_addr (csr_pipe_req_if.csr_addr),
|
.read_addr (csr_pipe_req_if.csr_addr),
|
||||||
.read_data (csr_read_data_unqual),
|
.read_data (csr_read_data),
|
||||||
.write_enable (is_csr_s2),
|
.write_enable (csr_we_s1),
|
||||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||||
.write_addr (csr_addr_s2),
|
.write_addr (csr_addr_s1),
|
||||||
.warp_num (csr_pipe_req_if.warp_num)
|
.warp_num (csr_pipe_req_if.warp_num)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num_s2;
|
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||||
|
&& (warp_num_s1 == csr_pipe_req_if.warp_num)
|
||||||
|
&& csr_pipe_commit_if.valid;
|
||||||
|
|
||||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||||
&& (warp_num_s2 == csr_pipe_req_if.warp_num)
|
|
||||||
&& is_csr_s2;
|
|
||||||
|
|
||||||
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
|
||||||
|
|
||||||
reg [31:0] csr_updated_data;
|
reg [31:0] csr_updated_data;
|
||||||
|
|
||||||
|
reg csr_we_s0_unqual;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
|
csr_we_s0_unqual = 0;
|
||||||
case (csr_pipe_req_if.csr_op)
|
case (csr_pipe_req_if.csr_op)
|
||||||
`CSR_RW: csr_updated_data = csr_pipe_req_if.csr_mask;
|
`CSR_RW: begin
|
||||||
`CSR_RS: csr_updated_data = csr_read_data | csr_pipe_req_if.csr_mask;
|
csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||||
`CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
|
csr_we_s0_unqual = 1;
|
||||||
|
end
|
||||||
|
`CSR_RS: begin
|
||||||
|
csr_updated_data = csr_read_data_qual | csr_pipe_req_if.csr_mask;
|
||||||
|
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
|
||||||
|
end
|
||||||
|
`CSR_RC: begin
|
||||||
|
csr_updated_data = csr_read_data_qual & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
|
||||||
|
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
|
||||||
|
end
|
||||||
default: csr_updated_data = 32'hdeadbeef;
|
default: csr_updated_data = 32'hdeadbeef;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
||||||
|
|
||||||
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||||
) csr_reg (
|
) csr_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s2, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
|
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
||||||
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||||
csr_read_data_s2;
|
csr_read_data_s1;
|
||||||
end
|
end
|
||||||
|
|
||||||
assign csr_pipe_req_if.ready = ~stall;
|
assign csr_pipe_req_if.ready = ~stall;
|
||||||
|
|||||||
@@ -337,10 +337,10 @@ module VX_decode #(
|
|||||||
|
|
||||||
assign decode_tmp_if.use_rs3 = use_rs3;
|
assign decode_tmp_if.use_rs3 = use_rs3;
|
||||||
|
|
||||||
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << rd)
|
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << decode_tmp_if.rd)
|
||||||
| ((`NUM_REGS)'(use_rs1) << rs1_qual)
|
| ((`NUM_REGS)'(use_rs1) << decode_tmp_if.rs1)
|
||||||
| ((`NUM_REGS)'(use_rs2) << rs2)
|
| ((`NUM_REGS)'(use_rs2) << decode_tmp_if.rs2)
|
||||||
| ((`NUM_REGS)'(use_rs3) << rs3);
|
| ((`NUM_REGS)'(use_rs3) << decode_tmp_if.rs3);
|
||||||
|
|
||||||
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
|
|
||||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||||
|
|
||||||
`define CSR_ADDR_SIZE 12
|
`define CSR_ADDR_BITS 12
|
||||||
|
|
||||||
`define CSR_WIDTH 12
|
`define CSR_WIDTH 12
|
||||||
|
|
||||||
@@ -38,8 +38,8 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define LATENCY_IDIV 24
|
`define LATENCY_IDIV 24
|
||||||
`define LATENCY_IMUL 2
|
`define LATENCY_IMUL 2
|
||||||
|
|
||||||
`define LATENCY_FMULADD 2
|
`define LATENCY_FMULADD 2
|
||||||
`define LATENCY_FDIVSQRT 2
|
`define LATENCY_FDIVSQRT 2
|
||||||
@@ -91,6 +91,7 @@
|
|||||||
`define BR_MRET 4'hA
|
`define BR_MRET 4'hA
|
||||||
`define BR_SRET 4'hB
|
`define BR_SRET 4'hB
|
||||||
`define BR_DRET 4'hC
|
`define BR_DRET 4'hC
|
||||||
|
`define BR_NO 4'hF
|
||||||
`define BR_BITS 4
|
`define BR_BITS 4
|
||||||
|
|
||||||
`define OP_BITS 5
|
`define OP_BITS 5
|
||||||
|
|||||||
@@ -20,12 +20,12 @@ module VX_fpu_unit #(
|
|||||||
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
||||||
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
||||||
|
|
||||||
localparam int FPU_DPATHW = `NUM_THREADS * 32;
|
localparam FPU_DPATHW = 32'd32;
|
||||||
|
|
||||||
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
||||||
Width: FPU_DPATHW,
|
Width: FPU_DPATHW,
|
||||||
EnableVectors: 1,
|
EnableVectors: 1'b0,
|
||||||
EnableNanBox: 1,
|
EnableNanBox: 1'b1,
|
||||||
FpFmtMask: 5'b10000,
|
FpFmtMask: 5'b10000,
|
||||||
IntFmtMask: 4'b0010
|
IntFmtMask: 4'b0010
|
||||||
};
|
};
|
||||||
@@ -54,7 +54,7 @@ module VX_fpu_unit #(
|
|||||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||||
fpnew_pkg::status_t fpu_status;
|
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
|
||||||
|
|
||||||
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
|
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
|
||||||
wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||||
@@ -65,7 +65,7 @@ module VX_fpu_unit #(
|
|||||||
reg [FOP_BITS-1:0] fpu_op;
|
reg [FOP_BITS-1:0] fpu_op;
|
||||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||||
reg fpu_op_mod;
|
reg fpu_op_mod;
|
||||||
reg fflags_en, fflags_en_o;
|
reg fflags_en, fflags_en_o;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
fpu_op = fpnew_pkg::SGNJ;
|
fpu_op = fpnew_pkg::SGNJ;
|
||||||
@@ -87,88 +87,94 @@ module VX_fpu_unit #(
|
|||||||
fpu_operands[2] = fpu_req_if.rs2_data;
|
fpu_operands[2] = fpu_req_if.rs2_data;
|
||||||
fpu_op_mod = 1;
|
fpu_op_mod = 1;
|
||||||
end
|
end
|
||||||
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
|
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||||
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
|
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||||
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
|
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||||
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
|
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||||
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
|
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||||
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
|
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
|
||||||
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
|
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
|
||||||
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
|
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
|
||||||
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||||
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||||
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
|
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||||
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
|
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
||||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
||||||
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
|
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
|
||||||
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
|
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
genvar i;
|
||||||
|
|
||||||
`DISABLE_TRACING
|
`DISABLE_TRACING
|
||||||
|
|
||||||
fpnew_top #(
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
.Features (FPU_FEATURES),
|
if (0 == i) begin
|
||||||
.Implementation (FPU_IMPLEMENTATION),
|
fpnew_top #(
|
||||||
.TagType (logic[`LOG2UP(`FPURQ_SIZE)-1+2:0])
|
.Features (FPU_FEATURES),
|
||||||
) fpnew_core (
|
.Implementation (FPU_IMPLEMENTATION),
|
||||||
.clk_i (clk),
|
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0])
|
||||||
.rst_ni (1'b1),
|
) fpnew_core (
|
||||||
.operands_i (fpu_operands),
|
.clk_i (clk),
|
||||||
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
.rst_ni (1'b1),
|
||||||
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
|
||||||
.op_mod_i (fpu_op_mod),
|
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
||||||
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
|
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
||||||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
.op_mod_i (fpu_op_mod),
|
||||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
|
||||||
.vectorial_op_i (1'b1),
|
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||||
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
|
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||||
.in_valid_i (fpu_in_valid),
|
.vectorial_op_i (1'b0),
|
||||||
.in_ready_o (fpu_in_ready),
|
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
|
||||||
.flush_i (reset),
|
.in_valid_i (fpu_in_valid),
|
||||||
.result_o (fpu_result),
|
.in_ready_o (fpu_in_ready),
|
||||||
.status_o (fpu_status),
|
.flush_i (reset),
|
||||||
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
|
.result_o (fpu_result),
|
||||||
.out_valid_o (fpu_out_valid),
|
.status_o (fpu_status[0]),
|
||||||
.out_ready_i (fpu_out_ready),
|
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
|
||||||
`UNUSED_PIN (busy_o)
|
.out_valid_o (fpu_out_valid),
|
||||||
);
|
.out_ready_i (fpu_out_ready),
|
||||||
|
`UNUSED_PIN (busy_o)
|
||||||
`ENABLE_TRACING
|
);
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] fpu_result_qual;
|
|
||||||
|
|
||||||
always @(8) begin
|
|
||||||
// unpack classify mask result
|
|
||||||
if (is_class_op_o) begin
|
|
||||||
integer i;
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
automatic integer l = i / 4;
|
|
||||||
automatic integer w = i % 4;
|
|
||||||
automatic integer class_mask = fpu_result[l][w * 8 +: 8];
|
|
||||||
|
|
||||||
fpu_result_qual[i][0] = class_mask[7] & class_mask[0];
|
|
||||||
fpu_result_qual[i][1] = class_mask[7] & class_mask[1];
|
|
||||||
fpu_result_qual[i][2] = class_mask[7] & class_mask[2];
|
|
||||||
fpu_result_qual[i][3] = class_mask[7] & class_mask[3];
|
|
||||||
fpu_result_qual[i][4] = class_mask[6] & class_mask[3];
|
|
||||||
fpu_result_qual[i][5] = class_mask[6] & class_mask[2];
|
|
||||||
fpu_result_qual[i][6] = class_mask[6] & class_mask[1];
|
|
||||||
fpu_result_qual[i][7] = class_mask[6] & class_mask[0];
|
|
||||||
fpu_result_qual[i][8] = class_mask[4];
|
|
||||||
fpu_result_qual[i][9] = class_mask[5];
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
fpu_result_qual = fpu_result;
|
fpnew_top #(
|
||||||
|
.Features (FPU_FEATURES),
|
||||||
|
.Implementation (FPU_IMPLEMENTATION),
|
||||||
|
.TagType (logic)
|
||||||
|
) fpnew_core (
|
||||||
|
.clk_i (clk),
|
||||||
|
.rst_ni (1'b1),
|
||||||
|
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
|
||||||
|
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
||||||
|
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
||||||
|
.op_mod_i (fpu_op_mod),
|
||||||
|
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
|
||||||
|
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||||
|
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||||
|
.vectorial_op_i (1'b0),
|
||||||
|
.tag_i (1'b0),
|
||||||
|
.in_valid_i (fpu_in_valid),
|
||||||
|
`UNUSED_PIN (in_ready_o),
|
||||||
|
.flush_i (reset),
|
||||||
|
.result_o (fpu_result[i]),
|
||||||
|
.status_o (fpu_status[i]),
|
||||||
|
`UNUSED_PIN (tag_o),
|
||||||
|
`UNUSED_PIN (out_valid_o),
|
||||||
|
.out_ready_i (fpu_out_ready),
|
||||||
|
`UNUSED_PIN (busy_o)
|
||||||
|
);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`ENABLE_TRACING
|
||||||
|
|
||||||
assign fpu_in_valid = fpu_req_if.valid;
|
assign fpu_in_valid = fpu_req_if.valid;
|
||||||
assign fpu_in_tag = fpu_req_if.issue_tag;
|
assign fpu_in_tag = fpu_req_if.issue_tag;
|
||||||
|
|
||||||
@@ -177,15 +183,18 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
assign fpu_commit_if.valid = fpu_out_valid;
|
assign fpu_commit_if.valid = fpu_out_valid;
|
||||||
assign fpu_commit_if.issue_tag = fpu_out_tag;
|
assign fpu_commit_if.issue_tag = fpu_out_tag;
|
||||||
assign fpu_commit_if.data = fpu_result_qual;
|
assign fpu_commit_if.data = fpu_result;
|
||||||
|
|
||||||
assign fpu_commit_if.upd_fflags = fflags_en_o;
|
assign fpu_commit_if.upd_fflags = fflags_en_o;
|
||||||
assign fpu_commit_if.fflags_NV = fpu_status.NV;
|
|
||||||
assign fpu_commit_if.fflags_DZ = fpu_status.DZ;
|
|
||||||
assign fpu_commit_if.fflags_OF = fpu_status.OF;
|
|
||||||
assign fpu_commit_if.fflags_UF = fpu_status.UF;
|
|
||||||
assign fpu_commit_if.fflags_NX = fpu_status.NX;
|
|
||||||
|
|
||||||
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
assign fpu_commit_if.fflags[i][0] = fpu_status[i].NX;
|
||||||
|
assign fpu_commit_if.fflags[i][1] = fpu_status[i].UF;
|
||||||
|
assign fpu_commit_if.fflags[i][2] = fpu_status[i].OF;
|
||||||
|
assign fpu_commit_if.fflags[i][3] = fpu_status[i].DZ;
|
||||||
|
assign fpu_commit_if.fflags[i][4] = fpu_status[i].NV;
|
||||||
|
end
|
||||||
|
|
||||||
assign fpu_out_ready = fpu_commit_if.ready;
|
assign fpu_out_ready = fpu_commit_if.ready;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -46,7 +46,7 @@ module VX_icache_stage #(
|
|||||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, req_tag};
|
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag};
|
||||||
`else
|
`else
|
||||||
assign icache_req_if.tag = req_tag;
|
assign icache_req_if.tag = req_tag;
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ module VX_issue #(
|
|||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||||
end
|
end
|
||||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||||
end
|
end
|
||||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ module VX_issue_demux (
|
|||||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
|
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
|
||||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
|
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0];
|
||||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
|
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
|
||||||
assign csr_req_if.is_io = 1'b0;
|
assign csr_req_if.is_io = 1'b0;
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ module VX_scheduler #(
|
|||||||
for (i = 0; i < `NUM_REGS; i++) begin
|
for (i = 0; i < `NUM_REGS; i++) begin
|
||||||
inuse_registers[w][i] <= 0;
|
inuse_registers[w][i] <= 0;
|
||||||
end
|
end
|
||||||
inuse_reg_mask[w] <= 0;
|
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (acquire_rd) begin
|
if (acquire_rd) begin
|
||||||
|
|||||||
@@ -24,9 +24,10 @@ module VX_writeback #(
|
|||||||
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0];
|
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0];
|
||||||
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0];
|
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0];
|
||||||
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0];
|
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0];
|
||||||
reg [`ISSUEQ_SIZE-1:0] wb_pending, wb_pending_n;
|
|
||||||
|
|
||||||
reg [`ISTAG_BITS-1:0] wb_index;
|
reg [`ISSUEQ_SIZE-1:0] wb_pending;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0] wb_pending_n;
|
||||||
|
reg [`ISTAG_BITS-1:0] wb_index;
|
||||||
wire [`ISTAG_BITS-1:0] wb_index_n;
|
wire [`ISTAG_BITS-1:0] wb_index_n;
|
||||||
|
|
||||||
reg wb_valid;
|
reg wb_valid;
|
||||||
@@ -67,6 +68,8 @@ module VX_writeback #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
wb_pending <= 0;
|
wb_pending <= 0;
|
||||||
|
wb_index <= 0;
|
||||||
|
wb_valid <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (alu_commit_if.valid) begin
|
if (alu_commit_if.valid) begin
|
||||||
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data;
|
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data;
|
||||||
|
|||||||
24
hw/rtl/cache/VX_bank.v
vendored
24
hw/rtl/cache/VX_bank.v
vendored
@@ -105,7 +105,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
/* verilator lint_off UNUSED */
|
/* verilator lint_off UNUSED */
|
||||||
wire[31:0] debug_use_pc_st0;
|
wire[31:0] debug_pc_st0;
|
||||||
wire debug_wb_st0;
|
wire debug_wb_st0;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
||||||
@@ -114,7 +114,7 @@ module VX_bank #(
|
|||||||
wire[`REQS_BITS-1:0] debug_tid_st0;
|
wire[`REQS_BITS-1:0] debug_tid_st0;
|
||||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||||
|
|
||||||
wire[31:0] debug_use_pc_st1e;
|
wire[31:0] debug_pc_st1e;
|
||||||
wire debug_wb_st1e;
|
wire debug_wb_st1e;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
||||||
@@ -123,7 +123,7 @@ module VX_bank #(
|
|||||||
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
||||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
||||||
|
|
||||||
wire[31:0] debug_use_pc_st2;
|
wire[31:0] debug_pc_st2;
|
||||||
wire debug_wb_st2;
|
wire debug_wb_st2;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
||||||
@@ -360,7 +360,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -432,6 +432,9 @@ module VX_bank #(
|
|||||||
&& (addr_st2 == addr_st1e);
|
&& (addr_st2 == addr_st1e);
|
||||||
|
|
||||||
VX_tag_data_access #(
|
VX_tag_data_access #(
|
||||||
|
.BANK_ID (BANK_ID),
|
||||||
|
.CACHE_ID (CACHE_ID),
|
||||||
|
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS),
|
||||||
.CACHE_SIZE (CACHE_SIZE),
|
.CACHE_SIZE (CACHE_SIZE),
|
||||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
@@ -442,6 +445,15 @@ module VX_bank #(
|
|||||||
) tag_data_access (
|
) tag_data_access (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
|
.debug_pc_st1e(debug_pc_st1e),
|
||||||
|
.debug_wb_st1e(debug_wb_st1e),
|
||||||
|
.debug_rd_st1e(debug_rd_st1e),
|
||||||
|
.debug_warp_num_st1e(debug_warp_num_st1e),
|
||||||
|
.debug_tagid_st1e(debug_tagid_st1e),
|
||||||
|
`endif
|
||||||
|
|
||||||
.stall (stall_bank_pipe),
|
.stall (stall_bank_pipe),
|
||||||
.stall_bank_pipe(stall_bank_pipe),
|
.stall_bank_pipe(stall_bank_pipe),
|
||||||
|
|
||||||
@@ -478,7 +490,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -519,7 +531,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
|||||||
40
hw/rtl/cache/VX_tag_data_access.v
vendored
40
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -1,6 +1,9 @@
|
|||||||
`include "VX_cache_config.vh"
|
`include "VX_cache_config.vh"
|
||||||
|
|
||||||
module VX_tag_data_access #(
|
module VX_tag_data_access #(
|
||||||
|
parameter CACHE_ID = 0,
|
||||||
|
parameter BANK_ID = 0,
|
||||||
|
parameter CORE_TAG_ID_BITS = 0,
|
||||||
// Size of cache in bytes
|
// Size of cache in bytes
|
||||||
parameter CACHE_SIZE = 0,
|
parameter CACHE_SIZE = 0,
|
||||||
// Size of line inside a bank in bytes
|
// Size of line inside a bank in bytes
|
||||||
@@ -22,6 +25,14 @@ module VX_tag_data_access #(
|
|||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
|
input wire[31:0] debug_pc_st1e,
|
||||||
|
input wire debug_wb_st1e,
|
||||||
|
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||||
|
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
||||||
|
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||||
|
`endif
|
||||||
|
|
||||||
input wire stall,
|
input wire stall,
|
||||||
input wire is_snp_st1e,
|
input wire is_snp_st1e,
|
||||||
input wire snp_invalidate_st1e,
|
input wire snp_invalidate_st1e,
|
||||||
@@ -85,10 +96,10 @@ module VX_tag_data_access #(
|
|||||||
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
|
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
|
||||||
|
|
||||||
VX_tag_data_store #(
|
VX_tag_data_store #(
|
||||||
.CACHE_SIZE (CACHE_SIZE),
|
.CACHE_SIZE (CACHE_SIZE),
|
||||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
.WORD_SIZE (WORD_SIZE)
|
.WORD_SIZE (WORD_SIZE)
|
||||||
) tag_data_store (
|
) tag_data_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -125,7 +136,7 @@ module VX_tag_data_access #(
|
|||||||
genvar i;
|
genvar i;
|
||||||
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
||||||
) s0_1_cc (
|
) s0_1_cc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -200,4 +211,23 @@ module VX_tag_data_access #(
|
|||||||
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
|
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
|
||||||
assign invalidate_line = snoop_hit_no_pending;
|
assign invalidate_line = snoop_hit_no_pending;
|
||||||
|
|
||||||
|
`ifdef DBG_PRINT_CACHE_BANK
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (valid_req_st1e) begin
|
||||||
|
if ((| use_write_enable)) begin
|
||||||
|
if (writefill_st1e) begin
|
||||||
|
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||||
|
end else begin
|
||||||
|
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||||
|
end
|
||||||
|
end else
|
||||||
|
if (miss_st1e) begin
|
||||||
|
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||||
|
end else begin
|
||||||
|
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -6,15 +6,13 @@
|
|||||||
interface VX_cmt_to_csr_if ();
|
interface VX_cmt_to_csr_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
|
|
||||||
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
|
||||||
wire [`NE_BITS:0] num_commits;
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
wire upd_fflags;
|
wire upd_fflags;
|
||||||
wire [`NW_BITS-1:0] fpu_warp_num;
|
wire [`FFG_BITS-1:0] fflags;
|
||||||
wire fflags_NV;
|
|
||||||
wire fflags_DZ;
|
|
||||||
wire fflags_OF;
|
|
||||||
wire fflags_UF;
|
|
||||||
wire fflags_NX;
|
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -5,11 +5,11 @@
|
|||||||
|
|
||||||
interface VX_csr_io_req_if ();
|
interface VX_csr_io_req_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`CSR_ADDR_SIZE-1:0] addr;
|
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||||
wire rw;
|
wire rw;
|
||||||
wire [31:0] data;
|
wire [31:0] data;
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ interface VX_csr_req_if ();
|
|||||||
|
|
||||||
wire [`CSR_BITS-1:0] csr_op;
|
wire [`CSR_BITS-1:0] csr_op;
|
||||||
|
|
||||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr;
|
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||||
wire [31:0] csr_mask;
|
wire [31:0] csr_mask;
|
||||||
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
wire [`NR_BITS-1:0] rd;
|
||||||
|
|||||||
@@ -9,11 +9,7 @@ interface VX_fpu_to_cmt_if ();
|
|||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire upd_fflags;
|
wire upd_fflags;
|
||||||
wire fflags_NV;
|
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
|
||||||
wire fflags_DZ;
|
|
||||||
wire fflags_OF;
|
|
||||||
wire fflags_UF;
|
|
||||||
wire fflags_NX;
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
|||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||||
|
|
||||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate $(FPU_INCLUDE)
|
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate $(FPU_INCLUDE)
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ void Simulator::wait(uint32_t cycles) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Simulator::is_busy() {
|
bool Simulator::is_busy() const {
|
||||||
return vortex_->busy || snp_req_active_;
|
return vortex_->busy || snp_req_active_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -255,11 +255,11 @@ void Simulator::run() {
|
|||||||
this->wait(5);
|
this->wait(5);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Simulator::get_last_wb_value(int reg) {
|
int Simulator::get_last_wb_value(int reg) const {
|
||||||
#if (NUM_CLUSTERS == 1)
|
#if (NUM_CLUSTERS != 1)
|
||||||
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||||
#else
|
#else
|
||||||
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ public:
|
|||||||
void load_bin(const char* program_file);
|
void load_bin(const char* program_file);
|
||||||
void load_ihex(const char* program_file);
|
void load_ihex(const char* program_file);
|
||||||
|
|
||||||
bool is_busy();
|
bool is_busy() const;
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
void step();
|
void step();
|
||||||
@@ -44,7 +44,7 @@ public:
|
|||||||
void attach_ram(RAM* ram);
|
void attach_ram(RAM* ram);
|
||||||
|
|
||||||
void run();
|
void run();
|
||||||
int get_last_wb_value(int reg);
|
int get_last_wb_value(int reg) const;
|
||||||
void print_stats(std::ostream& out);
|
void print_stats(std::ostream& out);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -83,11 +83,11 @@ vx_num_cores:
|
|||||||
.type vx_num_cycles, @function
|
.type vx_num_cycles, @function
|
||||||
.global vx_num_cycles
|
.global vx_num_cycles
|
||||||
vx_num_cycles:
|
vx_num_cycles:
|
||||||
csrr a0, CSR_CYCLE_L
|
csrr a0, CSR_CYCLE
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.type vx_num_instrs, @function
|
.type vx_num_instrs, @function
|
||||||
.global vx_num_instrs
|
.global vx_num_instrs
|
||||||
vx_num_instrs:
|
vx_num_instrs:
|
||||||
csrr a0, CSR_INSTR_L
|
csrr a0, CSR_INSTRET
|
||||||
ret
|
ret
|
||||||
@@ -454,7 +454,7 @@ Disassembly of section .text:
|
|||||||
80000698: 0005006b 0x5006b
|
80000698: 0005006b 0x5006b
|
||||||
8000069c: 00001197 auipc gp,0x1
|
8000069c: 00001197 auipc gp,0x1
|
||||||
800006a0: 16c18193 addi gp,gp,364 # 80001808 <__global_pointer$>
|
800006a0: 16c18193 addi gp,gp,364 # 80001808 <__global_pointer$>
|
||||||
800006a4: f14025f3 csrr a1,mhartid
|
800006a4: 022025f3 csrr a1,0x22
|
||||||
800006a8: 00a59593 slli a1,a1,0xa
|
800006a8: 00a59593 slli a1,a1,0xa
|
||||||
800006ac: 02002673 csrr a2,0x20
|
800006ac: 02002673 csrr a2,0x20
|
||||||
800006b0: 00261613 slli a2,a2,0x2
|
800006b0: 00261613 slli a2,a2,0x2
|
||||||
@@ -502,7 +502,7 @@ Disassembly of section .text:
|
|||||||
80000710: 00008067 ret
|
80000710: 00008067 ret
|
||||||
|
|
||||||
80000714 <vx_thread_gid>:
|
80000714 <vx_thread_gid>:
|
||||||
80000714: f1402573 csrr a0,mhartid
|
80000714: 02202573 csrr a0,0x22
|
||||||
80000718: 00008067 ret
|
80000718: 00008067 ret
|
||||||
|
|
||||||
8000071c <vx_core_id>:
|
8000071c <vx_core_id>:
|
||||||
|
|||||||
Binary file not shown.
@@ -105,14 +105,14 @@
|
|||||||
:10066800130504D7032481008320C100832441009B
|
:10066800130504D7032481008320C100832441009B
|
||||||
:10067800130101016F00000D13050000EF00800554
|
:10067800130101016F00000D13050000EF00800554
|
||||||
:100688006FF09FFA130500006B00050073255002F8
|
:100688006FF09FFA130500006B00050073255002F8
|
||||||
:100698006B000500971100009381C116F32540F106
|
:100698006B000500971100009381C116F325200215
|
||||||
:1006A8009395A500732600021316260037F1FF6FF5
|
:1006A8009395A500732600021316260037F1FF6FF5
|
||||||
:1006B8003301B1403301C100F326100263860600FE
|
:1006B8003301B1403301C100F326100263860600FE
|
||||||
:1006C800130500006B000500678000006B10B50083
|
:1006C800130500006B000500678000006B10B50083
|
||||||
:1006D800678000006B000500678000006B40B50074
|
:1006D800678000006B000500678000006B40B50074
|
||||||
:1006E800678000006B200500678000006B30000009
|
:1006E800678000006B200500678000006B30000009
|
||||||
:1006F80067800000732510026780000073253002B0
|
:1006F80067800000732510026780000073253002B0
|
||||||
:10070800678000007325000267800000732540F1B0
|
:1007080067800000732500026780000073252002BF
|
||||||
:10071800678000007325400267800000732550023F
|
:10071800678000007325400267800000732550023F
|
||||||
:1007280067800000732560026780000073257002EF
|
:1007280067800000732560026780000073257002EF
|
||||||
:1007380067800000732500C067800000732520C013
|
:1007380067800000732500C067800000732520C013
|
||||||
|
|||||||
Reference in New Issue
Block a user