moving MUL unit into ALU unit
This commit is contained in:
@@ -1,9 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# Dogfood tests
|
||||
./ci/test_runtime.sh
|
||||
./ci/test_riscv_isa.sh
|
||||
./ci/test_opencl.sh
|
||||
./ci/test_driver.sh
|
||||
|
||||
# Build tests disabling extensions
|
||||
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||
|
||||
# Blackbox tests
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
|
||||
|
||||
@@ -93,7 +93,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
uint64_t scoreboard_stalls = 0;
|
||||
uint64_t lsu_stalls = 0;
|
||||
uint64_t fpu_stalls = 0;
|
||||
uint64_t mul_stalls = 0;
|
||||
uint64_t csr_stalls = 0;
|
||||
uint64_t alu_stalls = 0;
|
||||
uint64_t gpu_stalls = 0;
|
||||
@@ -158,12 +157,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
uint64_t csr_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// mul_stall
|
||||
uint64_t mul_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MUL_ST, CSR_MPM_MUL_ST_H, &mul_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul unit stalls=%ld\n", core_id, mul_stalls_per_core);
|
||||
mul_stalls += mul_stalls_per_core;
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||
@@ -295,7 +289,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
|
||||
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: mul unit stalls=%ld\n", mul_stalls);
|
||||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||
|
||||
@@ -13,13 +13,16 @@ module VX_alu_unit #(
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_commit_if alu_commit_if
|
||||
);
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] add_result;
|
||||
reg [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire is_br_op = alu_req_if.is_br_op;
|
||||
wire stall_in, stall_out;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `IS_BR_MOD(alu_req_if.op_mod);
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||
@@ -34,17 +37,13 @@ module VX_alu_unit #(
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
end
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||
always @(*) begin
|
||||
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||
end
|
||||
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
@@ -52,9 +51,7 @@ module VX_alu_unit #(
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
|
||||
`IGNORE_WARNINGS_END
|
||||
always @(*) begin
|
||||
shr_result[i] = shr_value[31:0];
|
||||
end
|
||||
assign shr_result[i] = shr_value[31:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
@@ -92,13 +89,94 @@ module VX_alu_unit #(
|
||||
wire br_neg = `BR_NEG(br_op);
|
||||
wire br_less = `BR_LESS(br_op);
|
||||
wire br_static = `BR_STATIC(br_op);
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
|
||||
// output
|
||||
|
||||
wire result_valid;
|
||||
wire [`NW_BITS-1:0] result_wid;
|
||||
wire [`NUM_THREADS-1:0] result_tmask;
|
||||
wire [31:0] result_PC;
|
||||
wire [`NR_BITS-1:0] result_rd;
|
||||
wire result_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] result_data;
|
||||
wire result_is_br;
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
wire [`NW_BITS-1:0] mul_wid;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask;
|
||||
wire [31:0] mul_PC;
|
||||
wire [`NR_BITS-1:0] mul_rd;
|
||||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire is_mul_op = `IS_MUL_MOD(alu_req_if.op_mod);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (`MUL_OP(alu_req_if.op_type)),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
.rd_in (alu_req_if.rd),
|
||||
.wb_in (alu_req_if.wb),
|
||||
.alu_in1 (alu_req_if.rs1_data),
|
||||
.alu_in2 (alu_req_if.rs2_data),
|
||||
|
||||
// Outputs
|
||||
.wid_out (mul_wid),
|
||||
.tmask_out (mul_tmask),
|
||||
.PC_out (mul_PC),
|
||||
.rd_out (mul_rd),
|
||||
.wb_out (mul_wb),
|
||||
.data_out (mul_data),
|
||||
|
||||
// handshake
|
||||
.valid_in (alu_req_if.valid && is_mul_op),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out)
|
||||
);
|
||||
|
||||
assign stall_in = (is_mul_op && ~mul_ready_in)
|
||||
|| (~is_mul_op && (mul_valid_out || stall_out));
|
||||
|
||||
assign mul_ready_out = !stall_out;
|
||||
|
||||
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
|
||||
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
|
||||
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
|
||||
assign result_is_br = !mul_valid_out && is_br_op;
|
||||
|
||||
`else
|
||||
|
||||
assign stall_in = 0;
|
||||
|
||||
assign result_valid = alu_req_if.valid;
|
||||
assign result_wid = alu_req_if.wid;
|
||||
assign result_tmask = alu_req_if.tmask;
|
||||
assign result_PC = alu_req_if.PC;
|
||||
assign result_rd = alu_req_if.rd;
|
||||
assign result_wb = alu_req_if.wb;
|
||||
assign result_data = alu_jal_result;
|
||||
assign result_is_br = is_br_op;
|
||||
|
||||
`endif
|
||||
|
||||
wire is_br_op_r;
|
||||
|
||||
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
|
||||
@@ -107,8 +185,8 @@ module VX_alu_unit #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_taken, br_dest}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
@@ -117,6 +195,6 @@ module VX_alu_unit #(
|
||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~stall_out;
|
||||
assign alu_req_if.ready = ~stall_in;
|
||||
|
||||
endmodule
|
||||
@@ -9,8 +9,7 @@ module VX_commit #(
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
@@ -27,7 +26,6 @@ module VX_commit #(
|
||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||
wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready;
|
||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
@@ -35,7 +33,6 @@ module VX_commit #(
|
||||
|| ld_commit_fire
|
||||
|| st_commit_fire
|
||||
|| csr_commit_fire
|
||||
|| mul_commit_fire
|
||||
|| fpu_commit_fire
|
||||
|| gpu_commit_fire;
|
||||
|
||||
@@ -44,7 +41,6 @@ module VX_commit #(
|
||||
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
|
||||
ld_commit_fire ? ld_commit_if.tmask:
|
||||
csr_commit_fire ? csr_commit_if.tmask:
|
||||
mul_commit_fire ? mul_commit_if.tmask:
|
||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||
0;
|
||||
|
||||
@@ -76,7 +72,6 @@ module VX_commit #(
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
@@ -99,10 +94,7 @@ module VX_commit #(
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||
end
|
||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.PC, mul_commit_if.tmask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||
end
|
||||
end
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
||||
end
|
||||
|
||||
@@ -167,54 +167,52 @@
|
||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define CSR_MPM_CSR_ST 12'hB07
|
||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||
`define CSR_MPM_MUL_ST 12'hB08
|
||||
`define CSR_MPM_MUL_ST_H 12'hB88
|
||||
`define CSR_MPM_FPU_ST 12'hB09
|
||||
`define CSR_MPM_FPU_ST_H 12'hB89
|
||||
`define CSR_MPM_GPU_ST 12'hB0A
|
||||
`define CSR_MPM_GPU_ST_H 12'hB8A
|
||||
`define CSR_MPM_FPU_ST 12'hB08
|
||||
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||
`define CSR_MPM_GPU_ST 12'hB09
|
||||
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0B // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8B
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // total misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0D // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0A // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8A
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0B // total misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8B
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0C // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0D // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8D
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts stalls
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB15 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB16 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB96
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0E // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8E
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB0F // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB10 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB11 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB12 // bank conflicts stalls
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB13 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB14 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
// PERF: smem
|
||||
`define CSR_MPM_SMEM_READS 12'hB17 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB97
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB18 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB98
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB19 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB99
|
||||
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB96
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB97
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
|
||||
// PERF: memory
|
||||
`define CSR_MPM_DRAM_READS 12'hB1A // dram reads
|
||||
`define CSR_MPM_DRAM_READS_H 12'hB9A
|
||||
`define CSR_MPM_DRAM_WRITES 12'hB1B // dram writes
|
||||
`define CSR_MPM_DRAM_WRITES_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_ST 12'hB1C // dram request stalls
|
||||
`define CSR_MPM_DRAM_ST_H 12'hB9C
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1D // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9D
|
||||
`define CSR_MPM_DRAM_READS 12'hB19 // dram reads
|
||||
`define CSR_MPM_DRAM_READS_H 12'hB99
|
||||
`define CSR_MPM_DRAM_WRITES 12'hB1A // dram writes
|
||||
`define CSR_MPM_DRAM_WRITES_H 12'hB9A
|
||||
`define CSR_MPM_DRAM_ST 12'hB1B // dram request stalls
|
||||
`define CSR_MPM_DRAM_ST_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1C // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9C
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
|
||||
@@ -132,8 +132,6 @@ module VX_csr_data #(
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
|
||||
@@ -21,10 +21,10 @@ module VX_decode #(
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`FPU_BITS-1:0] fpu_op;
|
||||
reg [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
@@ -120,16 +120,11 @@ module VX_decode #(
|
||||
`INST_JAL: br_op = `BR_JAL;
|
||||
`INST_JALR: br_op = `BR_JALR;
|
||||
`INST_SYS: begin
|
||||
if (is_jals) begin
|
||||
case (u_12)
|
||||
12'h000: br_op = `BR_ECALL;
|
||||
12'h001: br_op = `BR_EBREAK;
|
||||
12'h302: br_op = `BR_MRET;
|
||||
12'h102: br_op = `BR_SRET;
|
||||
12'h7B2: br_op = `BR_DRET;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
@@ -174,7 +169,7 @@ module VX_decode #(
|
||||
|
||||
// MUL
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
case (func3)
|
||||
@@ -238,11 +233,11 @@ module VX_decode #(
|
||||
7'h0C: fpu_op = `FPU_DIV;
|
||||
7'h10: begin
|
||||
fpu_op = `FPU_MISC;
|
||||
frm = func3[1] ? 2 : (func3[0] ? 1 : 0);
|
||||
frm = func3[1] ? 3'b010 : {2'b0, func3[0]};
|
||||
end
|
||||
7'h14: begin
|
||||
fpu_op = `FPU_MISC;
|
||||
frm = (func3 == 3'h0) ? 3 : 4;
|
||||
frm = (func3 == 3'h0) ? 3'b011 : 3'b100;
|
||||
end
|
||||
7'h2C: begin
|
||||
fpu_op = `FPU_SQRT;
|
||||
@@ -272,6 +267,7 @@ module VX_decode #(
|
||||
wire is_fpu = 0;
|
||||
wire is_fpu_no_mem= 0;
|
||||
wire [2:0] frm = 0;
|
||||
wire is_fsqrt = 0;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_MISC;
|
||||
@@ -334,25 +330,23 @@ module VX_decode #(
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
|
||||
assign decode_if.ex_type = is_lsu ? `EX_LSU :
|
||||
assign decode_if.ex_type = is_gpu ? `EX_GPU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_fpu_no_mem ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
is_fpu_no_mem ? `EX_FPU :
|
||||
is_lsu ? `EX_LSU :
|
||||
(is_br || is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_if.op_type = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
assign decode_if.op_type = is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
|
||||
is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_br ? `OP_BITS'(br_op) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
assign decode_if.wb = use_rd;
|
||||
assign decode_if.wb = use_rd && (decode_if.ex_type != `EX_NOP);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
|
||||
@@ -370,13 +364,10 @@ module VX_decode #(
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
|
||||
wire is_nop = (decode_if.ex_type == `EX_NOP);
|
||||
|
||||
assign decode_if.used_regs = is_nop ? `NUM_REGS'(0) :
|
||||
((`NUM_REGS'(use_rd) << decode_if.rd)
|
||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3));
|
||||
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
||||
|
||||
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
@@ -386,7 +377,7 @@ module VX_decode #(
|
||||
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
|
||||
|
||||
wire [`MOD_BITS-1:0] alu_mod = is_br ? 1 : 0;
|
||||
wire [`MOD_BITS-1:0] alu_mod = {1'b0, is_mul, is_br};
|
||||
assign decode_if.op_mod = is_fpu_no_mem ? frm : alu_mod;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -68,9 +68,8 @@
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
@@ -118,10 +117,21 @@
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define ALU_BR_BITS 4
|
||||
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
|
||||
`define IS_BR_MOD(x) x[0]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
`define IS_MUL_MOD(x) x[1]
|
||||
|
||||
`define LSU_SB 3'h0
|
||||
`define LSU_SH 3'h1
|
||||
`define LSU_SW 3'h2
|
||||
@@ -138,18 +148,6 @@
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h1
|
||||
`define FPU_MUL 4'h2
|
||||
|
||||
@@ -27,8 +27,7 @@ module VX_execute #(
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
@@ -39,7 +38,6 @@ module VX_execute #(
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
@@ -93,26 +91,6 @@ module VX_execute #(
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
VX_mul_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mul_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.mul_req_if (mul_req_if),
|
||||
.mul_commit_if (mul_commit_if)
|
||||
);
|
||||
`else
|
||||
assign mul_req_if.ready = 0;
|
||||
assign mul_commit_if.valid = 0;
|
||||
assign mul_commit_if.wid = 0;
|
||||
assign mul_commit_if.PC = 0;
|
||||
assign mul_commit_if.tmask = 0;
|
||||
assign mul_commit_if.wb = 0;
|
||||
assign mul_commit_if.rd = 0;
|
||||
assign mul_commit_if.data = 0;
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -155,7 +133,7 @@ module VX_execute #(
|
||||
);
|
||||
|
||||
assign ebreak = alu_req_if.valid
|
||||
&& alu_req_if.is_br_op
|
||||
&& `IS_BR_MOD(alu_req_if.op_mod)
|
||||
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|
||||
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
|
||||
|
||||
|
||||
@@ -12,11 +12,15 @@ module VX_instr_demux (
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire alu_req_ready;
|
||||
wire lsu_req_ready;
|
||||
wire csr_req_ready;
|
||||
wire fpu_req_ready;
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_THREADS)
|
||||
@@ -32,20 +36,17 @@ module VX_instr_demux (
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
wire alu_req_ready;
|
||||
|
||||
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
|
||||
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.NOBACKPRESSURE (1) // ALU has no back pressure
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_OP(execute_if.op_type), execute_if.op_mod, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
@@ -53,7 +54,6 @@ module VX_instr_demux (
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
wire lsu_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
@@ -72,7 +72,6 @@ module VX_instr_demux (
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
wire csr_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
@@ -88,33 +87,11 @@ module VX_instr_demux (
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
||||
// mul unit
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
||||
wire mul_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) mul_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mul_req_valid),
|
||||
.ready_in (mul_req_ready),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
||||
.valid_out (mul_req_if.valid),
|
||||
.ready_out (mul_req_if.ready)
|
||||
);
|
||||
`endif
|
||||
|
||||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
wire fpu_req_ready;
|
||||
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
@@ -130,12 +107,12 @@ module VX_instr_demux (
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||
assign fpu_req_ready = 0;
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
@@ -158,7 +135,6 @@ module VX_instr_demux (
|
||||
`EX_ALU: ready_r = alu_req_ready;
|
||||
`EX_LSU: ready_r = lsu_req_ready;
|
||||
`EX_CSR: ready_r = csr_req_ready;
|
||||
`EX_MUL: ready_r = mul_req_ready;
|
||||
`EX_FPU: ready_r = fpu_req_ready;
|
||||
`EX_GPU: ready_r = gpu_req_ready;
|
||||
default: ready_r = 1'b1; // ignore NOPs
|
||||
|
||||
@@ -18,7 +18,6 @@ module VX_issue #(
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
@@ -86,7 +85,6 @@ module VX_issue #(
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
@@ -129,9 +127,6 @@ module VX_issue #(
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [63:0] perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
`endif
|
||||
@@ -144,9 +139,6 @@ module VX_issue #(
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_M_ENABLE
|
||||
perf_mul_stalls <= 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
@@ -169,11 +161,6 @@ module VX_issue #(
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
end
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
@@ -188,9 +175,6 @@ module VX_issue #(
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
@@ -207,9 +191,6 @@ module VX_issue #(
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
|
||||
end
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
|
||||
@@ -1,26 +1,35 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mul_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
module VX_muldiv (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_mul_req_if mul_req_if,
|
||||
// Inputs
|
||||
input wire [`MUL_BITS-1:0] alu_op,
|
||||
input wire [`NW_BITS-1:0] wid_in,
|
||||
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||
input wire [31:0] PC_in,
|
||||
input wire [`NR_BITS-1:0] rd_in,
|
||||
input wire wb_in,
|
||||
input wire [`NUM_THREADS-1:0][31:0] alu_in1,
|
||||
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if mul_commit_if
|
||||
output wire [`NW_BITS-1:0] wid_out,
|
||||
output wire [`NUM_THREADS-1:0] tmask_out,
|
||||
output wire [31:0] PC_out,
|
||||
output wire [`NR_BITS-1:0] rd_out,
|
||||
output wire wb_out,
|
||||
output wire [`NUM_THREADS-1:0][31:0] data_out,
|
||||
|
||||
// handshake
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire valid_out,
|
||||
input wire ready_out
|
||||
);
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
|
||||
wire is_div_op = `IS_DIV_OP(alu_op);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
||||
|
||||
wire ready_out;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
wire is_div_op = `IS_DIV_OP(alu_op);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire [`NW_BITS-1:0] mul_wid_out;
|
||||
@@ -29,9 +38,11 @@ module VX_mul_unit #(
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
wire mul_valid_out;
|
||||
wire mul_valid_in = mul_req_if.valid && !is_div_op;
|
||||
wire mul_ready_in = ready_out || ~mul_valid_out;
|
||||
wire mul_valid_in = valid_in && !is_div_op;
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
wire is_mulh_in = (alu_op != `MUL_MUL);
|
||||
wire is_mulh_out;
|
||||
@@ -68,8 +79,8 @@ module VX_mul_unit #(
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_mulh_in}),
|
||||
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
|
||||
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -83,8 +94,8 @@ module VX_mul_unit #(
|
||||
|
||||
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
|
||||
wire div_valid_in = mul_req_if.valid && is_div_op;
|
||||
wire div_ready_out = ready_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||
wire div_valid_in = valid_in && is_div_op;
|
||||
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||
wire div_ready_in;
|
||||
wire div_valid_out;
|
||||
wire is_rem_op_out;
|
||||
@@ -102,7 +113,7 @@ module VX_mul_unit #(
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.signed_mode(is_signed_div),
|
||||
.tag_in ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_rem_op_in}),
|
||||
.tag_in ({wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
|
||||
.numer (alu_in1),
|
||||
.denom (alu_in2),
|
||||
.quotient (div_result_tmp),
|
||||
@@ -116,9 +127,6 @@ module VX_mul_unit #(
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
|
||||
assign ready_out = ~stall_out;
|
||||
|
||||
wire rsp_valid = mul_valid_out || div_valid_out;
|
||||
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
|
||||
@@ -127,20 +135,20 @@ module VX_mul_unit #(
|
||||
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
|
||||
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
|
||||
|
||||
assign stall_out = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({valid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
|
||||
);
|
||||
|
||||
assign mul_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign mul_req_if.ready = is_div_op ? div_ready_in : mul_ready_in;
|
||||
assign ready_in = is_div_op ? div_ready_in : mul_ready_in;
|
||||
|
||||
endmodule
|
||||
@@ -150,8 +150,7 @@ module VX_pipeline #(
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_writeback_if writeback_if();
|
||||
@@ -160,8 +159,7 @@ module VX_pipeline #(
|
||||
VX_commit_if alu_commit_if();
|
||||
VX_commit_if ld_commit_if();
|
||||
VX_commit_if st_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
VX_commit_if mul_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
VX_commit_if fpu_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
@@ -214,7 +212,6 @@ module VX_pipeline #(
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
@@ -243,7 +240,6 @@ module VX_pipeline #(
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
@@ -253,7 +249,6 @@ module VX_pipeline #(
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
@@ -271,7 +266,6 @@ module VX_pipeline #(
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ task print_ex_type (
|
||||
`EX_ALU: $write("ALU");
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_MUL: $write("MUL");
|
||||
`EX_FPU: $write("FPU");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
@@ -41,6 +40,18 @@ task print_ex_op (
|
||||
`BR_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end else if (`IS_MUL_MOD(op_mod)) begin
|
||||
case (`MUL_BITS'(op_type))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU:$write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end else begin
|
||||
case (`ALU_BITS'(op_type))
|
||||
`ALU_ADD: $write("ADD");
|
||||
@@ -77,19 +88,6 @@ task print_ex_op (
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_MUL: begin
|
||||
case (`MUL_BITS'(op_type))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU:$write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`FPU_BITS'(op_type))
|
||||
`FPU_ADD: $write("ADD");
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||
|
||||
`define SCOPE_SIZE 4096
|
||||
`define SCOPE_SIZE 1024
|
||||
|
||||
`else
|
||||
|
||||
|
||||
@@ -10,17 +10,15 @@ module VX_writeback #(
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if
|
||||
);
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
||||
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
|
||||
/*wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;*/
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
@@ -30,47 +28,40 @@ module VX_writeback #(
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
wire wb_eop;
|
||||
|
||||
assign wb_valid = alu_valid ? alu_commit_if.valid :
|
||||
ld_valid ? ld_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
/*fpu_valid ?*/ fpu_commit_if.valid;
|
||||
assign wb_valid = ld_valid |
|
||||
fpu_valid |
|
||||
csr_valid |
|
||||
alu_valid;
|
||||
|
||||
assign wb_wid = alu_valid ? alu_commit_if.wid :
|
||||
ld_valid ? ld_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
mul_valid ? mul_commit_if.wid :
|
||||
/*fpu_valid ?*/ fpu_commit_if.wid;
|
||||
assign wb_wid = ld_valid ? ld_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
/*alu_valid ?*/ alu_commit_if.wid;
|
||||
|
||||
assign wb_PC = alu_valid ? alu_commit_if.PC :
|
||||
ld_valid ? ld_commit_if.PC :
|
||||
csr_valid ? csr_commit_if.PC :
|
||||
mul_valid ? mul_commit_if.PC :
|
||||
/*fpu_valid ?*/ fpu_commit_if.PC;
|
||||
assign wb_PC = ld_valid ? ld_commit_if.PC :
|
||||
fpu_valid ? fpu_commit_if.PC :
|
||||
csr_valid ? csr_commit_if.PC :
|
||||
/*alu_valid ?*/ alu_commit_if.PC;
|
||||
|
||||
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
|
||||
ld_valid ? ld_commit_if.tmask :
|
||||
csr_valid ? csr_commit_if.tmask :
|
||||
mul_valid ? mul_commit_if.tmask :
|
||||
/*fpu_valid ?*/ fpu_commit_if.tmask;
|
||||
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
|
||||
fpu_valid ? fpu_commit_if.tmask :
|
||||
csr_valid ? csr_commit_if.tmask :
|
||||
/*alu_valid ?*/ alu_commit_if.tmask;
|
||||
|
||||
assign wb_rd = alu_valid ? alu_commit_if.rd :
|
||||
ld_valid ? ld_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
/*fpu_valid ?*/ fpu_commit_if.rd;
|
||||
assign wb_rd = ld_valid ? ld_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
/*alu_valid ?*/ alu_commit_if.rd;
|
||||
|
||||
assign wb_data = alu_valid ? alu_commit_if.data :
|
||||
ld_valid ? ld_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
/*fpu_valid ?*/ fpu_commit_if.data;
|
||||
assign wb_data = ld_valid ? ld_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
/*alu_valid ?*/ alu_commit_if.data;
|
||||
|
||||
assign wb_eop = alu_valid ? alu_commit_if.eop :
|
||||
ld_valid ? ld_commit_if.eop :
|
||||
csr_valid ? csr_commit_if.eop :
|
||||
mul_valid ? mul_commit_if.eop :
|
||||
/*fpu_valid ?*/ fpu_commit_if.eop;
|
||||
assign wb_eop = ld_valid ? ld_commit_if.eop :
|
||||
fpu_valid ? fpu_commit_if.eop :
|
||||
csr_valid ? csr_commit_if.eop :
|
||||
/*alu_valid ?*/ alu_commit_if.eop;
|
||||
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
@@ -85,11 +76,10 @@ module VX_writeback #(
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||
);
|
||||
|
||||
assign alu_commit_if.ready = !stall;
|
||||
assign ld_commit_if.ready = !stall && !alu_valid;
|
||||
assign csr_commit_if.ready = !stall && !alu_valid && !ld_valid;
|
||||
assign mul_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid;
|
||||
assign fpu_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid && !mul_valid;
|
||||
assign ld_commit_if.ready = !stall;
|
||||
assign fpu_commit_if.ready = !stall && !ld_valid;
|
||||
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
|
||||
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
|
||||
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -301,7 +301,7 @@ module VX_bank #(
|
||||
// read/Fill
|
||||
.lookup (valid_st0 && !is_fill_st0),
|
||||
.addr (addr_st0),
|
||||
.fill (valid_st0 && is_fill_st0),
|
||||
.fill (valid_st0 && is_fill_st0 && !crsq_in_stall),
|
||||
.is_flush (is_flush_st0),
|
||||
.tag_match (tag_match_st0)
|
||||
);
|
||||
|
||||
2
hw/rtl/cache/VX_cache.v
vendored
2
hw/rtl/cache/VX_cache.v
vendored
@@ -20,7 +20,7 @@ module VX_cache #(
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 4,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRSQ_SIZE = 4,
|
||||
// DRAM Request Queue Size
|
||||
|
||||
2
hw/rtl/cache/VX_shared_mem.v
vendored
2
hw/rtl/cache/VX_shared_mem.v
vendored
@@ -164,7 +164,7 @@ module VX_shared_mem #(
|
||||
) data (
|
||||
.clk (clk),
|
||||
.addr (per_bank_core_req_addr[i]),
|
||||
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready),
|
||||
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i]),
|
||||
.byteen (per_bank_core_req_byteen[i]),
|
||||
.rden (1'b1),
|
||||
.din (per_bank_core_req_data[i]),
|
||||
|
||||
@@ -10,8 +10,8 @@ interface VX_alu_req_if ();
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [31:0] next_PC;
|
||||
wire [`ALU_BR_BITS-1:0] op_type;
|
||||
wire is_br_op;
|
||||
wire [`ALU_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire [31:0] imm;
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
`ifndef VX_MUL_REQ_IF
|
||||
`define VX_MUL_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef EXT_M_ENABLE
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_mul_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`MUL_BITS-1:0] op_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -10,9 +10,6 @@ interface VX_perf_pipeline_if ();
|
||||
wire [63:0] csr_stalls;
|
||||
wire [63:0] alu_stalls;
|
||||
wire [63:0] gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire [63:0] mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [63:0] fpu_stalls;
|
||||
`endif
|
||||
|
||||
@@ -105,33 +105,16 @@ module VX_fifo_queue #(
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
if (FASTRAM) begin
|
||||
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] shift_reg [SIZE];
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[~used_r[0]];
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] shift_reg [SIZE];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[~used_r[0]];
|
||||
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[!used_r[0]];
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -142,7 +125,7 @@ module VX_fifo_queue #(
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
if (push && (empty_r || (used_r && pop))) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
data_out_r <= buffer;
|
||||
|
||||
@@ -59,6 +59,7 @@ module VX_skid_buffer #(
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = !valid_out_r || ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@@ -68,11 +69,11 @@ module VX_skid_buffer #(
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end
|
||||
if (push && valid_out_r && !ready_out) begin
|
||||
if (push && !pop) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
if (!valid_out_r || ready_out) begin
|
||||
if (pop) begin
|
||||
valid_out_r <= valid_in || use_buffer;
|
||||
end
|
||||
end
|
||||
@@ -82,7 +83,7 @@ module VX_skid_buffer #(
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (!valid_out_r || ready_out) begin
|
||||
if (pop) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
@@ -118,8 +119,8 @@ module VX_skid_buffer #(
|
||||
);
|
||||
|
||||
assign ready_in = !q_full;
|
||||
assign valid_out = !q_empty;
|
||||
|
||||
assign valid_out = !q_empty;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -27,6 +27,9 @@ SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
|
||||
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
SINGLECORE += $(CONFIGS)
|
||||
MULTICORE += $(CONFIGS)
|
||||
|
||||
TOP = Vortex
|
||||
|
||||
RTL_DIR=../rtl
|
||||
@@ -49,7 +52,7 @@ VL_FLAGS += --cc Vortex.v --top-module $(TOP)
|
||||
# Use FPNEW PFU core
|
||||
VL_FLAGS += -DFPU_FPNEW
|
||||
|
||||
DBG_FLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
DBG_FLAGS += -DVCD_OUTPUT
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
PROJECT = vx_nl_main
|
||||
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <vx_print.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// Main is called with all threads active of warp 0
|
||||
vx_tmc(1);
|
||||
const int Num = 9;
|
||||
const float fNum = 9.0f;
|
||||
|
||||
vx_prints("Newlib Main ");
|
||||
vx_printx(456);
|
||||
vx_prints(" \n");
|
||||
int fibonacci(int n) {
|
||||
if (n <= 1)
|
||||
return n;
|
||||
return fibonacci(n-1) + fibonacci(n-2);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int fib = fibonacci(Num);
|
||||
float isq = 1.0f / sqrt(fNum);
|
||||
vx_printf("fibonacci(%d) = %d\n", Num, fib);
|
||||
vx_printf("invAqrt(%f) = %f\n", fNum, isq);
|
||||
vx_prints("Passed!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user