moving MUL unit into ALU unit

This commit is contained in:
Blaise Tine
2021-02-23 13:49:02 -08:00
parent 1792571e1b
commit 700f9eea19
30 changed files with 112978 additions and 9680 deletions

View File

@@ -1,9 +1,19 @@
#!/bin/bash
# exit when any command fails
set -e
# Dogfood tests
./ci/test_runtime.sh
./ci/test_riscv_isa.sh
./ci/test_opencl.sh
./ci/test_driver.sh
# Build tests disabling extensions
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"

View File

@@ -93,7 +93,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t scoreboard_stalls = 0;
uint64_t lsu_stalls = 0;
uint64_t fpu_stalls = 0;
uint64_t mul_stalls = 0;
uint64_t csr_stalls = 0;
uint64_t alu_stalls = 0;
uint64_t gpu_stalls = 0;
@@ -158,12 +157,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t csr_stalls_per_core;
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
csr_stalls += csr_stalls_per_core;
// mul_stall
uint64_t mul_stalls_per_core;
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MUL_ST, CSR_MPM_MUL_ST_H, &mul_stalls_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul unit stalls=%ld\n", core_id, mul_stalls_per_core);
mul_stalls += mul_stalls_per_core;
csr_stalls += csr_stalls_per_core;
// fpu_stall
uint64_t fpu_stalls_per_core;
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
@@ -295,7 +289,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
fprintf(stream, "PERF: mul unit stalls=%ld\n", mul_stalls);
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);

View File

@@ -13,13 +13,16 @@ module VX_alu_unit #(
VX_branch_ctl_if branch_ctl_if,
VX_commit_if alu_commit_if
);
reg [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] add_result;
reg [`NUM_THREADS-1:0][32:0] sub_result;
reg [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] add_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
wire is_br_op = alu_req_if.is_br_op;
wire stall_in, stall_out;
`UNUSED_VAR (alu_req_if.op_mod)
wire is_br_op = `IS_BR_MOD(alu_req_if.op_mod);
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
@@ -34,17 +37,13 @@ module VX_alu_unit #(
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
end
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
always @(*) begin
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
end
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
@@ -52,9 +51,7 @@ module VX_alu_unit #(
`IGNORE_WARNINGS_BEGIN
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
`IGNORE_WARNINGS_END
always @(*) begin
shr_result[i] = shr_value[31:0];
end
assign shr_result[i] = shr_value[31:0];
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
@@ -92,13 +89,94 @@ module VX_alu_unit #(
wire br_neg = `BR_NEG(br_op);
wire br_less = `BR_LESS(br_op);
wire br_static = `BR_STATIC(br_op);
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
// output
wire result_valid;
wire [`NW_BITS-1:0] result_wid;
wire [`NUM_THREADS-1:0] result_tmask;
wire [31:0] result_PC;
wire [`NR_BITS-1:0] result_rd;
wire result_wb;
wire [`NUM_THREADS-1:0][31:0] result_data;
wire result_is_br;
`ifdef EXT_M_ENABLE
wire mul_ready_in;
wire mul_valid_out;
wire mul_ready_out;
wire [`NW_BITS-1:0] mul_wid;
wire [`NUM_THREADS-1:0] mul_tmask;
wire [31:0] mul_PC;
wire [`NR_BITS-1:0] mul_rd;
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire is_mul_op = `IS_MUL_MOD(alu_req_if.op_mod);
VX_muldiv muldiv (
.clk (clk),
.reset (reset),
// Inputs
.alu_op (`MUL_OP(alu_req_if.op_type)),
.wid_in (alu_req_if.wid),
.tmask_in (alu_req_if.tmask),
.PC_in (alu_req_if.PC),
.rd_in (alu_req_if.rd),
.wb_in (alu_req_if.wb),
.alu_in1 (alu_req_if.rs1_data),
.alu_in2 (alu_req_if.rs2_data),
// Outputs
.wid_out (mul_wid),
.tmask_out (mul_tmask),
.PC_out (mul_PC),
.rd_out (mul_rd),
.wb_out (mul_wb),
.data_out (mul_data),
// handshake
.valid_in (alu_req_if.valid && is_mul_op),
.ready_in (mul_ready_in),
.valid_out (mul_valid_out),
.ready_out (mul_ready_out)
);
assign stall_in = (is_mul_op && ~mul_ready_in)
|| (~is_mul_op && (mul_valid_out || stall_out));
assign mul_ready_out = !stall_out;
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
assign result_is_br = !mul_valid_out && is_br_op;
`else
assign stall_in = 0;
assign result_valid = alu_req_if.valid;
assign result_wid = alu_req_if.wid;
assign result_tmask = alu_req_if.tmask;
assign result_PC = alu_req_if.PC;
assign result_rd = alu_req_if.rd;
assign result_wb = alu_req_if.wb;
assign result_data = alu_jal_result;
assign result_is_br = is_br_op;
`endif
wire is_br_op_r;
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
@@ -107,8 +185,8 @@ module VX_alu_unit #(
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_taken, br_dest}),
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
);
assign alu_commit_if.eop = 1'b1;
@@ -117,6 +195,6 @@ module VX_alu_unit #(
assign branch_ctl_if.wid = alu_commit_if.wid;
// can accept new request?
assign alu_req_if.ready = ~stall_out;
assign alu_req_if.ready = ~stall_in;
endmodule

View File

@@ -9,8 +9,7 @@ module VX_commit #(
// inputs
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
@@ -27,7 +26,6 @@ module VX_commit #(
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready;
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
@@ -35,7 +33,6 @@ module VX_commit #(
|| ld_commit_fire
|| st_commit_fire
|| csr_commit_fire
|| mul_commit_fire
|| fpu_commit_fire
|| gpu_commit_fire;
@@ -44,7 +41,6 @@ module VX_commit #(
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
ld_commit_fire ? ld_commit_if.tmask:
csr_commit_fire ? csr_commit_if.tmask:
mul_commit_fire ? mul_commit_if.tmask:
fpu_commit_fire ? fpu_commit_if.tmask:
0;
@@ -76,7 +72,6 @@ module VX_commit #(
.alu_commit_if (alu_commit_if),
.ld_commit_if (ld_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.writeback_if (writeback_if)
@@ -99,10 +94,7 @@ module VX_commit #(
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if (mul_commit_if.valid && mul_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.PC, mul_commit_if.tmask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end
end
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
end

View File

@@ -167,54 +167,52 @@
`define CSR_MPM_LSU_ST_H 12'hB86
`define CSR_MPM_CSR_ST 12'hB07
`define CSR_MPM_CSR_ST_H 12'hB87
`define CSR_MPM_MUL_ST 12'hB08
`define CSR_MPM_MUL_ST_H 12'hB88
`define CSR_MPM_FPU_ST 12'hB09
`define CSR_MPM_FPU_ST_H 12'hB89
`define CSR_MPM_GPU_ST 12'hB0A
`define CSR_MPM_GPU_ST_H 12'hB8A
`define CSR_MPM_FPU_ST 12'hB08
`define CSR_MPM_FPU_ST_H 12'hB88
`define CSR_MPM_GPU_ST 12'hB09
`define CSR_MPM_GPU_ST_H 12'hB89
// PERF: icache
`define CSR_MPM_ICACHE_READS 12'hB0B // total reads
`define CSR_MPM_ICACHE_READS_H 12'hB8B
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // total misses
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0D // pipeline stalls
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8D
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
`define CSR_MPM_ICACHE_READS 12'hB0A // total reads
`define CSR_MPM_ICACHE_READS_H 12'hB8A
`define CSR_MPM_ICACHE_MISS_R 12'hB0B // total misses
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8B
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0C // pipeline stalls
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8C
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0D // core response stalls
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8D
// PERF: dcache
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
`define CSR_MPM_DCACHE_READS_H 12'hB8F
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts stalls
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
`define CSR_MPM_DCACHE_PIPE_ST 12'hB15 // pipeline stalls
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB95
`define CSR_MPM_DCACHE_CRSP_ST 12'hB16 // core response stalls
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB96
`define CSR_MPM_DCACHE_READS 12'hB0E // total reads
`define CSR_MPM_DCACHE_READS_H 12'hB8E
`define CSR_MPM_DCACHE_WRITES 12'hB0F // total writes
`define CSR_MPM_DCACHE_WRITES_H 12'hB8F
`define CSR_MPM_DCACHE_MISS_R 12'hB10 // read misses
`define CSR_MPM_DCACHE_MISS_R_H 12'hB90
`define CSR_MPM_DCACHE_MISS_W 12'hB11 // write misses
`define CSR_MPM_DCACHE_MISS_W_H 12'hB91
`define CSR_MPM_DCACHE_BANK_ST 12'hB12 // bank conflicts stalls
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB92
`define CSR_MPM_DCACHE_MSHR_ST 12'hB13 // MSHR stalls
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB93
`define CSR_MPM_DCACHE_PIPE_ST 12'hB14 // pipeline stalls
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB94
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
// PERF: smem
`define CSR_MPM_SMEM_READS 12'hB17 // total reads
`define CSR_MPM_SMEM_READS_H 12'hB97
`define CSR_MPM_SMEM_WRITES 12'hB18 // total writes
`define CSR_MPM_SMEM_WRITES_H 12'hB98
`define CSR_MPM_SMEM_BANK_ST 12'hB19 // bank conflicts stalls
`define CSR_MPM_SMEM_BANK_ST_H 12'hB99
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
`define CSR_MPM_SMEM_READS_H 12'hB96
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
`define CSR_MPM_SMEM_WRITES_H 12'hB97
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
// PERF: memory
`define CSR_MPM_DRAM_READS 12'hB1A // dram reads
`define CSR_MPM_DRAM_READS_H 12'hB9A
`define CSR_MPM_DRAM_WRITES 12'hB1B // dram writes
`define CSR_MPM_DRAM_WRITES_H 12'hB9B
`define CSR_MPM_DRAM_ST 12'hB1C // dram request stalls
`define CSR_MPM_DRAM_ST_H 12'hB9C
`define CSR_MPM_DRAM_LAT 12'hB1D // dram latency (total)
`define CSR_MPM_DRAM_LAT_H 12'hB9D
`define CSR_MPM_DRAM_READS 12'hB19 // dram reads
`define CSR_MPM_DRAM_READS_H 12'hB99
`define CSR_MPM_DRAM_WRITES 12'hB1A // dram writes
`define CSR_MPM_DRAM_WRITES_H 12'hB9A
`define CSR_MPM_DRAM_ST 12'hB1B // dram request stalls
`define CSR_MPM_DRAM_ST_H 12'hB9B
`define CSR_MPM_DRAM_LAT 12'hB1C // dram latency (total)
`define CSR_MPM_DRAM_LAT_H 12'hB9C
// Machine Information Registers
`define CSR_MVENDORID 12'hF11

View File

@@ -132,8 +132,6 @@ module VX_csr_data #(
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];

View File

@@ -21,10 +21,10 @@ module VX_decode #(
wire [31:0] instr = ifetch_rsp_if.instr;
reg [`ALU_BITS-1:0] alu_op;
reg [`BR_BITS-1:0] br_op;
reg [`BR_BITS-1:0] br_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`LSU_BITS-1:0] lsu_op;
reg [`CSR_BITS-1:0] csr_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`FPU_BITS-1:0] fpu_op;
reg [`GPU_BITS-1:0] gpu_op;
@@ -120,16 +120,11 @@ module VX_decode #(
`INST_JAL: br_op = `BR_JAL;
`INST_JALR: br_op = `BR_JALR;
`INST_SYS: begin
if (is_jals) begin
case (u_12)
12'h000: br_op = `BR_ECALL;
12'h001: br_op = `BR_EBREAK;
12'h302: br_op = `BR_MRET;
12'h102: br_op = `BR_SRET;
12'h7B2: br_op = `BR_DRET;
default:;
endcase
end
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
end
default:;
endcase
@@ -174,7 +169,7 @@ module VX_decode #(
// MUL
`ifdef EXT_M_ENABLE
wire is_mul = is_rtype && (func7 == 7'h1);
wire is_mul = is_rtype && (func7 == 7'h1);
always @(*) begin
mul_op = `MUL_MUL;
case (func3)
@@ -238,11 +233,11 @@ module VX_decode #(
7'h0C: fpu_op = `FPU_DIV;
7'h10: begin
fpu_op = `FPU_MISC;
frm = func3[1] ? 2 : (func3[0] ? 1 : 0);
frm = func3[1] ? 3'b010 : {2'b0, func3[0]};
end
7'h14: begin
fpu_op = `FPU_MISC;
frm = (func3 == 3'h0) ? 3 : 4;
frm = (func3 == 3'h0) ? 3'b011 : 3'b100;
end
7'h2C: begin
fpu_op = `FPU_SQRT;
@@ -272,6 +267,7 @@ module VX_decode #(
wire is_fpu = 0;
wire is_fpu_no_mem= 0;
wire [2:0] frm = 0;
wire is_fsqrt = 0;
always @(*) begin
fpu_op = `FPU_MISC;
@@ -334,25 +330,23 @@ module VX_decode #(
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = is_lsu ? `EX_LSU :
assign decode_if.ex_type = is_gpu ? `EX_GPU :
is_csr ? `EX_CSR :
is_mul ? `EX_MUL :
is_fpu_no_mem ? `EX_FPU :
is_gpu ? `EX_GPU :
is_br ? `EX_ALU :
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
is_fpu_no_mem ? `EX_FPU :
is_lsu ? `EX_LSU :
(is_br || is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
assign decode_if.op_type = is_lsu ? `OP_BITS'(lsu_op) :
assign decode_if.op_type = is_gpu ? `OP_BITS'(gpu_op) :
is_csr ? `OP_BITS'(csr_op) :
is_mul ? `OP_BITS'(mul_op) :
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
is_gpu ? `OP_BITS'(gpu_op) :
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
is_lsu ? `OP_BITS'(lsu_op) :
is_br ? `OP_BITS'(br_op) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
assign decode_if.wb = use_rd;
assign decode_if.wb = use_rd && (decode_if.ex_type != `EX_NOP);
`ifdef EXT_F_ENABLE
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
@@ -370,13 +364,10 @@ module VX_decode #(
assign decode_if.rs3 = rs3;
`endif
wire is_nop = (decode_if.ex_type == `EX_NOP);
assign decode_if.used_regs = is_nop ? `NUM_REGS'(0) :
((`NUM_REGS'(use_rd) << decode_if.rd)
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
| (`NUM_REGS'(use_rs3) << decode_if.rs3));
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
(is_jal || is_jalr || is_jals) ? jalx_offset :
@@ -386,7 +377,7 @@ module VX_decode #(
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
wire [`MOD_BITS-1:0] alu_mod = is_br ? 1 : 0;
wire [`MOD_BITS-1:0] alu_mod = {1'b0, is_mul, is_br};
assign decode_if.op_mod = is_fpu_no_mem ? frm : alu_mod;
///////////////////////////////////////////////////////////////////////////

View File

@@ -68,9 +68,8 @@
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_MUL 3'h4
`define EX_FPU 3'h5
`define EX_GPU 3'h6
`define EX_FPU 3'h4
`define EX_GPU 3'h5
`define EX_BITS 3
`define NUM_EXS 6
@@ -118,10 +117,21 @@
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_MOD(x) x[0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define IS_MUL_MOD(x) x[1]
`define LSU_SB 3'h0
`define LSU_SH 3'h1
`define LSU_SW 3'h2
@@ -138,18 +148,6 @@
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define FPU_ADD 4'h0
`define FPU_SUB 4'h1
`define FPU_MUL 4'h2

View File

@@ -27,8 +27,7 @@ module VX_execute #(
// inputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_csr_req_if csr_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if,
@@ -39,7 +38,6 @@ module VX_execute #(
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
@@ -93,26 +91,6 @@ module VX_execute #(
.busy (busy)
);
`ifdef EXT_M_ENABLE
VX_mul_unit #(
.CORE_ID(CORE_ID)
) mul_unit (
.clk (clk),
.reset (reset),
.mul_req_if (mul_req_if),
.mul_commit_if (mul_commit_if)
);
`else
assign mul_req_if.ready = 0;
assign mul_commit_if.valid = 0;
assign mul_commit_if.wid = 0;
assign mul_commit_if.PC = 0;
assign mul_commit_if.tmask = 0;
assign mul_commit_if.wb = 0;
assign mul_commit_if.rd = 0;
assign mul_commit_if.data = 0;
`endif
`ifdef EXT_F_ENABLE
VX_fpu_unit #(
.CORE_ID(CORE_ID)
@@ -155,7 +133,7 @@ module VX_execute #(
);
assign ebreak = alu_req_if.valid
&& alu_req_if.is_br_op
&& `IS_BR_MOD(alu_req_if.op_mod)
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);

View File

@@ -12,11 +12,15 @@ module VX_instr_demux (
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
wire [`NT_BITS-1:0] tid;
wire alu_req_ready;
wire lsu_req_ready;
wire csr_req_ready;
wire fpu_req_ready;
wire gpu_req_ready;
VX_priority_encoder #(
.N (`NUM_THREADS)
@@ -32,20 +36,17 @@ module VX_instr_demux (
// ALU unit
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_ready;
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.NOBACKPRESSURE (1) // ALU has no back pressure
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) alu_buffer (
.clk (clk),
.reset (reset),
.valid_in (alu_req_valid),
.ready_in (alu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_OP(execute_if.op_type), execute_if.op_mod, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.valid_out (alu_req_if.valid),
.ready_out (alu_req_if.ready)
);
@@ -53,7 +54,6 @@ module VX_instr_demux (
// lsu unit
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
wire lsu_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
@@ -72,7 +72,6 @@ module VX_instr_demux (
// csr unit
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
wire csr_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
@@ -88,33 +87,11 @@ module VX_instr_demux (
.ready_out (csr_req_if.ready)
);
// mul unit
`ifdef EXT_M_ENABLE
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
wire mul_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) mul_buffer (
.clk (clk),
.reset (reset),
.valid_in (mul_req_valid),
.ready_in (mul_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
.valid_out (mul_req_if.valid),
.ready_out (mul_req_if.ready)
);
`endif
// fpu unit
`ifdef EXT_F_ENABLE
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
wire fpu_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
@@ -130,12 +107,12 @@ module VX_instr_demux (
);
`else
`UNUSED_VAR (gpr_rsp_if.rs3_data)
assign fpu_req_ready = 0;
`endif
// gpu unit
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
wire gpu_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
@@ -158,7 +135,6 @@ module VX_instr_demux (
`EX_ALU: ready_r = alu_req_ready;
`EX_LSU: ready_r = lsu_req_ready;
`EX_CSR: ready_r = csr_req_ready;
`EX_MUL: ready_r = mul_req_ready;
`EX_FPU: ready_r = fpu_req_ready;
`EX_GPU: ready_r = gpu_req_ready;
default: ready_r = 1'b1; // ignore NOPs

View File

@@ -18,7 +18,6 @@ module VX_issue #(
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
@@ -86,7 +85,6 @@ module VX_issue #(
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
@@ -129,9 +127,6 @@ module VX_issue #(
reg [63:0] perf_lsu_stalls;
reg [63:0] perf_csr_stalls;
reg [63:0] perf_gpu_stalls;
`ifdef EXT_M_ENABLE
reg [63:0] perf_mul_stalls;
`endif
`ifdef EXT_F_ENABLE
reg [63:0] perf_fpu_stalls;
`endif
@@ -144,9 +139,6 @@ module VX_issue #(
perf_lsu_stalls <= 0;
perf_csr_stalls <= 0;
perf_gpu_stalls <= 0;
`ifdef EXT_M_ENABLE
perf_mul_stalls <= 0;
`endif
`ifdef EXT_F_ENABLE
perf_fpu_stalls <= 0;
`endif
@@ -169,11 +161,6 @@ module VX_issue #(
if (gpu_req_if.valid & !gpu_req_if.ready) begin
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
end
`ifdef EXT_M_ENABLE
if (mul_req_if.valid & !mul_req_if.ready) begin
perf_mul_stalls <= perf_mul_stalls + 64'd1;
end
`endif
`ifdef EXT_F_ENABLE
if (fpu_req_if.valid & !fpu_req_if.ready) begin
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
@@ -188,9 +175,6 @@ module VX_issue #(
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
`ifdef EXT_M_ENABLE
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
`endif
`ifdef EXT_F_ENABLE
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
`endif
@@ -207,9 +191,6 @@ module VX_issue #(
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
end
if (mul_req_if.valid && mul_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
end

View File

@@ -1,26 +1,35 @@
`include "VX_define.vh"
module VX_mul_unit #(
parameter CORE_ID = 0
) (
module VX_muldiv (
input wire clk,
input wire reset,
// Inputs
VX_mul_req_if mul_req_if,
// Inputs
input wire [`MUL_BITS-1:0] alu_op,
input wire [`NW_BITS-1:0] wid_in,
input wire [`NUM_THREADS-1:0] tmask_in,
input wire [31:0] PC_in,
input wire [`NR_BITS-1:0] rd_in,
input wire wb_in,
input wire [`NUM_THREADS-1:0][31:0] alu_in1,
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
// Outputs
VX_commit_if mul_commit_if
output wire [`NW_BITS-1:0] wid_out,
output wire [`NUM_THREADS-1:0] tmask_out,
output wire [31:0] PC_out,
output wire [`NR_BITS-1:0] rd_out,
output wire wb_out,
output wire [`NUM_THREADS-1:0][31:0] data_out,
// handshake
input wire valid_in,
output wire ready_in,
output wire valid_out,
input wire ready_out
);
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
wire is_div_op = `IS_DIV_OP(alu_op);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
wire ready_out;
///////////////////////////////////////////////////////////////////////////
wire is_div_op = `IS_DIV_OP(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [`NW_BITS-1:0] mul_wid_out;
@@ -29,9 +38,11 @@ module VX_mul_unit #(
wire [`NR_BITS-1:0] mul_rd_out;
wire mul_wb_out;
wire stall_out;
wire mul_valid_out;
wire mul_valid_in = mul_req_if.valid && !is_div_op;
wire mul_ready_in = ready_out || ~mul_valid_out;
wire mul_valid_in = valid_in && !is_div_op;
wire mul_ready_in = ~stall_out || ~mul_valid_out;
wire is_mulh_in = (alu_op != `MUL_MUL);
wire is_mulh_out;
@@ -68,8 +79,8 @@ module VX_mul_unit #(
.clk(clk),
.reset (reset),
.enable (mul_ready_in),
.data_in ({mul_valid_in, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_mulh_in}),
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
);
///////////////////////////////////////////////////////////////////////////
@@ -83,8 +94,8 @@ module VX_mul_unit #(
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
wire div_valid_in = mul_req_if.valid && is_div_op;
wire div_ready_out = ready_out && ~mul_valid_out; // arbitration prioritizes MUL
wire div_valid_in = valid_in && is_div_op;
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
wire div_ready_in;
wire div_valid_out;
wire is_rem_op_out;
@@ -102,7 +113,7 @@ module VX_mul_unit #(
.valid_in (div_valid_in),
.ready_in (div_ready_in),
.signed_mode(is_signed_div),
.tag_in ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_rem_op_in}),
.tag_in ({wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
.numer (alu_in1),
.denom (alu_in2),
.quotient (div_result_tmp),
@@ -116,9 +127,6 @@ module VX_mul_unit #(
///////////////////////////////////////////////////////////////////////////
wire stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
assign ready_out = ~stall_out;
wire rsp_valid = mul_valid_out || div_valid_out;
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
@@ -127,20 +135,20 @@ module VX_mul_unit #(
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
assign stall_out = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
.enable (~stall_out),
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
.data_out ({valid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
);
assign mul_commit_if.eop = 1'b1;
// can accept new request?
assign mul_req_if.ready = is_div_op ? div_ready_in : mul_ready_in;
assign ready_in = is_div_op ? div_ready_in : mul_ready_in;
endmodule

View File

@@ -150,8 +150,7 @@ module VX_pipeline #(
VX_ifetch_rsp_if ifetch_rsp_if();
VX_alu_req_if alu_req_if();
VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if();
VX_mul_req_if mul_req_if();
VX_csr_req_if csr_req_if();
VX_fpu_req_if fpu_req_if();
VX_gpu_req_if gpu_req_if();
VX_writeback_if writeback_if();
@@ -160,8 +159,7 @@ module VX_pipeline #(
VX_commit_if alu_commit_if();
VX_commit_if ld_commit_if();
VX_commit_if st_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if fpu_commit_if();
VX_commit_if gpu_commit_if();
@@ -214,7 +212,6 @@ module VX_pipeline #(
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
@@ -243,7 +240,6 @@ module VX_pipeline #(
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if),
@@ -253,7 +249,6 @@ module VX_pipeline #(
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
@@ -271,7 +266,6 @@ module VX_pipeline #(
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),

View File

@@ -10,7 +10,6 @@ task print_ex_type (
`EX_ALU: $write("ALU");
`EX_LSU: $write("LSU");
`EX_CSR: $write("CSR");
`EX_MUL: $write("MUL");
`EX_FPU: $write("FPU");
`EX_GPU: $write("GPU");
default: $write("NOP");
@@ -41,6 +40,18 @@ task print_ex_op (
`BR_DRET: $write("DRET");
default: $write("?");
endcase
end else if (`IS_MUL_MOD(op_mod)) begin
case (`MUL_BITS'(op_type))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
`MUL_MULHU: $write("MULHU");
`MUL_DIV: $write("DIV");
`MUL_DIVU: $write("DIVU");
`MUL_REM: $write("REM");
`MUL_REMU: $write("REMU");
default: $write("?");
endcase
end else begin
case (`ALU_BITS'(op_type))
`ALU_ADD: $write("ADD");
@@ -77,19 +88,6 @@ task print_ex_op (
default: $write("?");
endcase
end
`EX_MUL: begin
case (`MUL_BITS'(op_type))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
`MUL_MULHU: $write("MULHU");
`MUL_DIV: $write("DIV");
`MUL_DIVU: $write("DIVU");
`MUL_REM: $write("REM");
`MUL_REMU: $write("REMU");
default: $write("?");
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op_type))
`FPU_ADD: $write("ADD");

View File

@@ -7,7 +7,7 @@
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
`define SCOPE_SIZE 4096
`define SCOPE_SIZE 1024
`else

View File

@@ -10,17 +10,15 @@ module VX_writeback #(
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
// outputs
VX_writeback_if writeback_if
);
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
/*wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;*/
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
@@ -30,47 +28,40 @@ module VX_writeback #(
wire [`NUM_THREADS-1:0][31:0] wb_data;
wire wb_eop;
assign wb_valid = alu_valid ? alu_commit_if.valid :
ld_valid ? ld_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
/*fpu_valid ?*/ fpu_commit_if.valid;
assign wb_valid = ld_valid |
fpu_valid |
csr_valid |
alu_valid;
assign wb_wid = alu_valid ? alu_commit_if.wid :
ld_valid ? ld_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
/*fpu_valid ?*/ fpu_commit_if.wid;
assign wb_wid = ld_valid ? ld_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
csr_valid ? csr_commit_if.wid :
/*alu_valid ?*/ alu_commit_if.wid;
assign wb_PC = alu_valid ? alu_commit_if.PC :
ld_valid ? ld_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
/*fpu_valid ?*/ fpu_commit_if.PC;
assign wb_PC = ld_valid ? ld_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
csr_valid ? csr_commit_if.PC :
/*alu_valid ?*/ alu_commit_if.PC;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
ld_valid ? ld_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
mul_valid ? mul_commit_if.tmask :
/*fpu_valid ?*/ fpu_commit_if.tmask;
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
fpu_valid ? fpu_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
/*alu_valid ?*/ alu_commit_if.tmask;
assign wb_rd = alu_valid ? alu_commit_if.rd :
ld_valid ? ld_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
/*fpu_valid ?*/ fpu_commit_if.rd;
assign wb_rd = ld_valid ? ld_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
/*alu_valid ?*/ alu_commit_if.rd;
assign wb_data = alu_valid ? alu_commit_if.data :
ld_valid ? ld_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
/*fpu_valid ?*/ fpu_commit_if.data;
assign wb_data = ld_valid ? ld_commit_if.data :
fpu_valid ? fpu_commit_if.data :
csr_valid ? csr_commit_if.data :
/*alu_valid ?*/ alu_commit_if.data;
assign wb_eop = alu_valid ? alu_commit_if.eop :
ld_valid ? ld_commit_if.eop :
csr_valid ? csr_commit_if.eop :
mul_valid ? mul_commit_if.eop :
/*fpu_valid ?*/ fpu_commit_if.eop;
assign wb_eop = ld_valid ? ld_commit_if.eop :
fpu_valid ? fpu_commit_if.eop :
csr_valid ? csr_commit_if.eop :
/*alu_valid ?*/ alu_commit_if.eop;
wire stall = ~writeback_if.ready && writeback_if.valid;
@@ -85,11 +76,10 @@ module VX_writeback #(
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
);
assign alu_commit_if.ready = !stall;
assign ld_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !ld_valid;
assign mul_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid && !mul_valid;
assign ld_commit_if.ready = !stall;
assign fpu_commit_if.ready = !stall && !ld_valid;
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
// special workaround to get RISC-V tests Pass/Fail status
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;

View File

@@ -301,7 +301,7 @@ module VX_bank #(
// read/Fill
.lookup (valid_st0 && !is_fill_st0),
.addr (addr_st0),
.fill (valid_st0 && is_fill_st0),
.fill (valid_st0 && is_fill_st0 && !crsq_in_stall),
.is_flush (is_flush_st0),
.tag_match (tag_match_st0)
);

View File

@@ -20,7 +20,7 @@ module VX_cache #(
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
parameter MSHR_SIZE = 8,
// DRAM Response Queue Size
parameter DRSQ_SIZE = 4,
// DRAM Request Queue Size

View File

@@ -164,7 +164,7 @@ module VX_shared_mem #(
) data (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready),
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i]),
.byteen (per_bank_core_req_byteen[i]),
.rden (1'b1),
.din (per_bank_core_req_data[i]),

View File

@@ -10,8 +10,8 @@ interface VX_alu_req_if ();
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [31:0] next_PC;
wire [`ALU_BR_BITS-1:0] op_type;
wire is_br_op;
wire [`ALU_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire rs1_is_PC;
wire rs2_is_imm;
wire [31:0] imm;

View File

@@ -1,25 +0,0 @@
`ifndef VX_MUL_REQ_IF
`define VX_MUL_REQ_IF
`include "VX_define.vh"
`ifndef EXT_M_ENABLE
`IGNORE_WARNINGS_BEGIN
`endif
interface VX_mul_req_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`MUL_BITS-1:0] op_type;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;
endinterface
`endif

View File

@@ -10,9 +10,6 @@ interface VX_perf_pipeline_if ();
wire [63:0] csr_stalls;
wire [63:0] alu_stalls;
wire [63:0] gpu_stalls;
`ifdef EXT_M_ENABLE
wire [63:0] mul_stalls;
`endif
`ifdef EXT_F_ENABLE
wire [63:0] fpu_stalls;
`endif

View File

@@ -105,33 +105,16 @@ module VX_fifo_queue #(
if (0 == BUFFERED) begin
if (FASTRAM) begin
`USE_FAST_BRAM reg [DATAW-1:0] shift_reg [SIZE];
reg [1:0][DATAW-1:0] shift_reg;
always @(posedge clk) begin
if (push) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
end
always @(posedge clk) begin
if (push) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
end
assign data_out = shift_reg[~used_r[0]];
end else begin
reg [DATAW-1:0] shift_reg [SIZE];
always @(posedge clk) begin
if (push) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
end
end
assign data_out = shift_reg[~used_r[0]];
end
assign data_out = shift_reg[!used_r[0]];
end else begin
@@ -142,7 +125,7 @@ module VX_fifo_queue #(
if (push) begin
buffer <= data_in;
end
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
if (push && (empty_r || (used_r && pop))) begin
data_out_r <= data_in;
end else if (pop) begin
data_out_r <= buffer;

View File

@@ -59,6 +59,7 @@ module VX_skid_buffer #(
reg use_buffer;
wire push = valid_in && ready_in;
wire pop = !valid_out_r || ready_out;
always @(posedge clk) begin
if (reset) begin
@@ -68,11 +69,11 @@ module VX_skid_buffer #(
if (ready_out) begin
use_buffer <= 0;
end
if (push && valid_out_r && !ready_out) begin
if (push && !pop) begin
assert(!use_buffer);
use_buffer <= 1;
end
if (!valid_out_r || ready_out) begin
if (pop) begin
valid_out_r <= valid_in || use_buffer;
end
end
@@ -82,7 +83,7 @@ module VX_skid_buffer #(
if (push) begin
buffer <= data_in;
end
if (!valid_out_r || ready_out) begin
if (pop) begin
data_out_r <= use_buffer ? buffer : data_in;
end
end
@@ -118,8 +119,8 @@ module VX_skid_buffer #(
);
assign ready_in = !q_full;
assign valid_out = !q_empty;
assign valid_out = !q_empty;
end
end

View File

@@ -27,6 +27,9 @@ SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
SINGLECORE += $(CONFIGS)
MULTICORE += $(CONFIGS)
TOP = Vortex
RTL_DIR=../rtl
@@ -49,7 +52,7 @@ VL_FLAGS += --cc Vortex.v --top-module $(TOP)
# Use FPNEW PFU core
VL_FLAGS += -DFPU_FPNEW
DBG_FLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
DBG_FLAGS += -DVCD_OUTPUT
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')

View File

@@ -9,7 +9,7 @@ CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
PROJECT = vx_nl_main

View File

@@ -1,16 +1,22 @@
#include <vx_intrinsics.h>
#include <stdio.h>
#include <math.h>
#include <vx_print.h>
int main()
{
// Main is called with all threads active of warp 0
vx_tmc(1);
const int Num = 9;
const float fNum = 9.0f;
vx_prints("Newlib Main ");
vx_printx(456);
vx_prints(" \n");
int fibonacci(int n) {
if (n <= 1)
return n;
return fibonacci(n-1) + fibonacci(n-2);
}
int main() {
int fib = fibonacci(Num);
float isq = 1.0f / sqrt(fNum);
vx_printf("fibonacci(%d) = %d\n", Num, fib);
vx_printf("invAqrt(%f) = %f\n", fNum, isq);
vx_prints("Passed!\n");
return 0;
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff