pipeline refactoring - fmax >= 222 mhz
This commit is contained in:
@@ -18,7 +18,7 @@ VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../include
|
||||
CXXFLAGS += -I../../include -I../../../hw
|
||||
|
||||
PROJECT = dogfood
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "testcases.h"
|
||||
#include "common.h"
|
||||
|
||||
@@ -25,6 +26,7 @@ public:
|
||||
this->add_test("imul", new Test_IMUL());
|
||||
this->add_test("idiv", new Test_IDIV());
|
||||
this->add_test("idiv-mul", new Test_IDIV_MUL());
|
||||
#ifdef EXT_F_ENABLE
|
||||
this->add_test("fadd", new Test_FADD());
|
||||
this->add_test("fsub", new Test_FSUB());
|
||||
this->add_test("fmul", new Test_FMUL());
|
||||
@@ -40,6 +42,7 @@ public:
|
||||
this->add_test("ftou", new Test_FTOU());
|
||||
this->add_test("tof", new Test_ITOF());
|
||||
this->add_test("utof", new Test_UTOF());
|
||||
#endif
|
||||
}
|
||||
|
||||
~TestMngr() {
|
||||
|
||||
@@ -15,8 +15,8 @@ union Float_t {
|
||||
};
|
||||
|
||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||
auto tolerance = std::max(std::fabs(a), std::fabs(b)) * eps;
|
||||
return std::fabs(a - b) <= tolerance;
|
||||
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
|
||||
return fabs(a - b) <= tolerance;
|
||||
}
|
||||
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||
|
||||
@@ -62,6 +62,7 @@ make ase
|
||||
# tests
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n 16
|
||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||
|
||||
# modify "vsim_run.tcl" to dump VCD trace
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
# Analysis & Synthesis Assignments
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
set_global_assignment -name VERILOG_MACRO QUARTUS
|
||||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
@@ -1035,8 +1035,7 @@ wire [SCOPE_DATAW+1:0] scope_data_in_ste;
|
||||
assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start};
|
||||
assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 1; i < SCOPE_SR_DEPTH; i++) begin
|
||||
for (genvar i = 1; i < SCOPE_SR_DEPTH; i++) begin
|
||||
VX_generic_register #(
|
||||
.N (SCOPE_DATAW+2)
|
||||
) scope_sr (
|
||||
|
||||
@@ -10,98 +10,83 @@ module VX_alu_unit #(
|
||||
VX_alu_req_if alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
);
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] addsub_result;
|
||||
wire [`NUM_THREADS-1:0] less_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shift_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] misc_result;
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op = alu_req_if.alu_op;
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
genvar i;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.curr_PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire negate_add = (alu_op == `ALU_SUB);
|
||||
wire signed_less = (alu_op == `ALU_SLT);
|
||||
wire signed_shift = (alu_op == `ALU_SRA);
|
||||
|
||||
wire [32:0] sub_in1 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in2[i][31], alu_in2[i]};
|
||||
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] addsub_in1 = {alu_in1_PC[i], 1'b1};
|
||||
wire [32:0] addsub_in2 = {alu_in2_imm[i], 1'b0} ^ {33{negate_add}};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] addsub_addd = addsub_in1 + addsub_in2;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign addsub_result[i] = addsub_addd[32:1];
|
||||
end
|
||||
|
||||
wire [32:0] shift_in1 = {(alu_op == `ALU_SRA) & alu_in1[i][31], alu_in1[i]};
|
||||
assign shift_result[i] = $signed(shift_in1) >>> alu_in2[i][4:0];
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_SUB: alu_result[i] = sub_result[i][31:0];
|
||||
`ALU_SLL: alu_result[i] = alu_in1[i] << alu_in2[i][4:0];
|
||||
`ALU_SLT,
|
||||
`ALU_SLTU: alu_result[i] = 32'(sub_result[i][32]);
|
||||
`ALU_XOR: alu_result[i] = alu_in1[i] ^ alu_in2[i];
|
||||
`ALU_SRL,
|
||||
`ALU_SRA: alu_result[i] = shift_result[i][31:0];
|
||||
`ALU_OR: alu_result[i] = alu_in1[i] | alu_in2[i];
|
||||
`ALU_AND: alu_result[i] = alu_in1[i] & alu_in2[i];
|
||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC
|
||||
endcase
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] less_in1 = {signed_less & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] less_in2 = {signed_less & alu_in2_imm[i][31], alu_in2_imm[i]};
|
||||
assign less_result[i] = $signed(less_in1) < $signed(less_in2);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] shift_in1 = {signed_shift & alu_in1[i][31], alu_in1[i]};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] shift_value = $signed(shift_in1) >>> alu_in2_imm[i][4:0];
|
||||
`IGNORE_WARNINGS_END
|
||||
assign shift_result[i] = shift_value[31:0];
|
||||
end
|
||||
|
||||
wire [`NT_BITS-1:0] br_result_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.data_in (alu_req_if.thread_mask),
|
||||
.data_out (br_result_index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [32:0] br_result = sub_result[br_result_index];
|
||||
wire br_sign = br_result[32];
|
||||
wire br_nzero = (| br_result[31:0]);
|
||||
wire br_sign_s1;
|
||||
wire br_nzero_s1;
|
||||
|
||||
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO;
|
||||
wire [`BR_BITS-1:0] br_op_s1;
|
||||
|
||||
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
|
||||
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
||||
|
||||
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: misc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: misc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: misc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
default: misc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (`ALU_OP_CLASS(alu_op))
|
||||
0: alu_result[i] = addsub_result[i];
|
||||
1: alu_result[i] = {31'b0, less_result[i]};
|
||||
2: alu_result[i] = shift_result[i];
|
||||
default: alu_result[i] = misc_result[i];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1)
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32))
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.stall (0),
|
||||
.flush (0),
|
||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}),
|
||||
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1})
|
||||
);
|
||||
|
||||
reg br_taken;
|
||||
always @(*) begin
|
||||
case (br_op_s1)
|
||||
`BR_NE: br_taken = br_nzero_s1;
|
||||
`BR_EQ: br_taken = ~br_nzero_s1;
|
||||
`BR_LT,
|
||||
`BR_LTU: br_taken = br_sign_s1;
|
||||
`BR_GE,
|
||||
`BR_GEU: br_taken = ~br_sign_s1;
|
||||
default: br_taken = 1'b1;
|
||||
endcase
|
||||
end
|
||||
.in ({alu_req_if.valid, alu_req_if.issue_tag, alu_result}),
|
||||
.out ({alu_commit_if.valid, alu_commit_if.issue_tag, alu_commit_if.data})
|
||||
);
|
||||
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO);
|
||||
assign branch_ctl_if.taken = br_taken;
|
||||
|
||||
assign alu_req_if.ready = ~stall;
|
||||
assign alu_req_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
56
hw/rtl/VX_bru_unit.v
Normal file
56
hw/rtl/VX_bru_unit.v
Normal file
@@ -0,0 +1,56 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_bru_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_bru_req_if bru_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_exu_to_cmt_if bru_commit_if
|
||||
);
|
||||
wire [`BRU_BITS-1:0] bru_op = bru_req_if.op;
|
||||
wire bru_neg = `BRU_NEG(bru_op);
|
||||
wire bru_less = `BRU_LESS(bru_op);
|
||||
wire bru_signed = `BRU_SIGNED(bru_op);
|
||||
wire bru_static = `BRU_STATIC(bru_op);
|
||||
|
||||
wire [31:0] rs1_data = bru_req_if.rs1_data;
|
||||
wire [31:0] rs2_data = bru_req_if.rs2_data;
|
||||
|
||||
wire [32:0] signed_in1 = {bru_signed & rs1_data[31], rs1_data};
|
||||
wire [32:0] signed_in2 = {bru_signed & rs2_data[31], rs2_data};
|
||||
wire is_less = $signed(signed_in1) < $signed(signed_in2);
|
||||
|
||||
wire is_equal = (rs1_data == rs2_data);
|
||||
|
||||
wire taken = ((bru_less ? is_less : is_equal) ^ bru_neg) | bru_static;
|
||||
|
||||
wire [31:0] base_addr = bru_req_if.rs1_is_PC ? bru_req_if.curr_PC : rs1_data;
|
||||
wire [31:0] dest = base_addr + bru_req_if.offset;
|
||||
|
||||
wire [31:0] jal_result = bru_req_if.curr_PC + 4;
|
||||
wire [31:0] jal_result_r;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `ISTAG_BITS + 1 + 32 + 32)
|
||||
) bru_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (0),
|
||||
.flush (0),
|
||||
.in ({bru_req_if.valid, bru_req_if.wid, bru_req_if.issue_tag, taken, dest, jal_result}),
|
||||
.out ({bru_commit_if.valid, branch_ctl_if.wid, bru_commit_if.issue_tag, branch_ctl_if.taken, branch_ctl_if.dest, jal_result_r})
|
||||
);
|
||||
|
||||
assign branch_ctl_if.valid = bru_commit_if.valid;
|
||||
|
||||
assign bru_commit_if.data = {`NUM_THREADS{jal_result_r}};
|
||||
|
||||
assign bru_req_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
@@ -135,9 +135,7 @@ module VX_cluster #(
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
wire [`NUM_CORES-1:0] per_core_ebreak;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_CORES; i++) begin
|
||||
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||
VX_core #(
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) core (
|
||||
@@ -316,7 +314,7 @@ module VX_cluster #(
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
|
||||
|
||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
|
||||
@@ -472,7 +470,7 @@ module VX_cluster #(
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
|
||||
|
||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ module VX_commit #(
|
||||
|
||||
// inputs
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
@@ -22,12 +23,13 @@ module VX_commit #(
|
||||
// update CRSs
|
||||
|
||||
wire [`NUM_EXS-1:0] commited_mask;
|
||||
assign commited_mask = {(alu_commit_if.valid && alu_commit_if.ready),
|
||||
(lsu_commit_if.valid && lsu_commit_if.ready),
|
||||
(csr_commit_if.valid && csr_commit_if.ready),
|
||||
(mul_commit_if.valid && mul_commit_if.ready),
|
||||
(fpu_commit_if.valid && fpu_commit_if.ready),
|
||||
(gpu_commit_if.valid && gpu_commit_if.ready)};
|
||||
assign commited_mask = {alu_commit_if.valid,
|
||||
bru_commit_if.valid,
|
||||
lsu_commit_if.valid,
|
||||
csr_commit_if.valid,
|
||||
mul_commit_if.valid,
|
||||
fpu_commit_if.valid,
|
||||
gpu_commit_if.valid};
|
||||
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
||||
@@ -38,18 +40,10 @@ module VX_commit #(
|
||||
.count (num_commits)
|
||||
);
|
||||
|
||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
||||
assign cmt_to_csr_if.num_commits = num_commits;
|
||||
|
||||
assign cmt_to_csr_if.has_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.has_fflags;
|
||||
|
||||
integer i;
|
||||
|
||||
fflags_t fflags;
|
||||
always @(*) begin
|
||||
fflags = 0;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
||||
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
||||
@@ -59,18 +53,39 @@ module VX_commit #(
|
||||
end
|
||||
end
|
||||
end
|
||||
assign cmt_to_csr_if.fflags = fflags;
|
||||
|
||||
fflags_t fflags_r;
|
||||
reg has_fflags_r;
|
||||
reg [`NW_BITS-1:0] wid_r;
|
||||
reg [`NE_BITS:0] num_commits_r;
|
||||
reg csr_update_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
csr_update_r <= (| commited_mask);
|
||||
fflags_r <= fflags;
|
||||
has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags;
|
||||
wid_r <= cmt_to_issue_if.fpu_data.wid;
|
||||
num_commits_r <= num_commits;
|
||||
end
|
||||
|
||||
assign cmt_to_csr_if.valid = csr_update_r;
|
||||
assign cmt_to_csr_if.wid = wid_r;
|
||||
assign cmt_to_csr_if.num_commits = num_commits_r;
|
||||
assign cmt_to_csr_if.has_fflags = has_fflags_r;
|
||||
assign cmt_to_csr_if.fflags = fflags_r;
|
||||
|
||||
// Notify issue stage
|
||||
|
||||
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
|
||||
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
|
||||
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
|
||||
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
|
||||
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid;
|
||||
assign cmt_to_issue_if.bru_valid = bru_commit_if.valid;
|
||||
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid;
|
||||
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid;
|
||||
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid;
|
||||
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid;
|
||||
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid;
|
||||
|
||||
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.bru_tag = bru_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
|
||||
@@ -84,6 +99,7 @@ module VX_commit #(
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
@@ -96,23 +112,26 @@ module VX_commit #(
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.warp_num, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
||||
if (alu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.wid, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
||||
end
|
||||
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.warp_num, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
||||
if (bru_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.bru_data.wid, cmt_to_issue_if.bru_data.curr_PC, bru_commit_if.issue_tag, cmt_to_issue_if.bru_data.thread_mask, cmt_to_issue_if.bru_data.wb, cmt_to_issue_if.bru_data.rd, bru_commit_if.data);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.warp_num, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
||||
if (lsu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.wid, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
||||
end
|
||||
if (csr_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.wid, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
||||
end
|
||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.warp_num, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
||||
if (mul_commit_if.validy) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.wid, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
||||
end
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.warp_num, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
||||
if (fpu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.wid, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
||||
end
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.warp_num, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
||||
if (gpu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.wid, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -12,7 +12,7 @@ module VX_csr_arb (
|
||||
VX_csr_req_if csr_req_if,
|
||||
|
||||
// input
|
||||
VX_exu_to_cmt_if csr_rsp_if,
|
||||
VX_csr_rsp_if csr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
@@ -28,9 +28,9 @@ module VX_csr_arb (
|
||||
// requests
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
||||
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.op = (~select_io_req) ? csr_core_req_if.op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
@@ -48,6 +48,6 @@ module VX_csr_arb (
|
||||
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : 1'b1;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -9,7 +9,7 @@ module VX_csr_data #(
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
input wire[`NW_BITS-1:0] wid,
|
||||
|
||||
input wire read_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
@@ -38,24 +38,24 @@ module VX_csr_data #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (cmt_to_csr_if.has_fflags) begin
|
||||
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
|
||||
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
||||
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags;
|
||||
csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
||||
end
|
||||
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: begin
|
||||
csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||
csr_fcsr[wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
csr_fflags[wid] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
`CSR_FRM: begin
|
||||
csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
csr_frm[warp_num] <= write_data[`FRM_BITS-1:0];
|
||||
csr_fcsr[wid][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
csr_frm[wid] <= write_data[`FRM_BITS-1:0];
|
||||
end
|
||||
`CSR_FCSR: begin
|
||||
csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||
csr_fcsr[wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
csr_frm[wid] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||
csr_fflags[wid] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
@@ -79,7 +79,7 @@ module VX_csr_data #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_cycle <= 0;
|
||||
csr_cycle <= 0;
|
||||
csr_instret <= 0;
|
||||
end else begin
|
||||
csr_cycle <= csr_cycle + 1;
|
||||
@@ -91,15 +91,15 @@ module VX_csr_data #(
|
||||
|
||||
always @(*) begin
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]);
|
||||
`CSR_FRM : read_data = 32'(csr_frm[warp_num]);
|
||||
`CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]);
|
||||
`CSR_FFLAGS : read_data = 32'(csr_fflags[wid]);
|
||||
`CSR_FRM : read_data = 32'(csr_frm[wid]);
|
||||
`CSR_FCSR : read_data = 32'(csr_fcsr[wid]);
|
||||
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_LWID : read_data = 32'(wid);
|
||||
`CSR_LTID ,
|
||||
`CSR_GTID ,
|
||||
`CSR_MHARTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(wid);
|
||||
`CSR_GCID : read_data = CORE_ID;
|
||||
`CSR_NT : read_data = `NUM_THREADS;
|
||||
`CSR_NW : read_data = `NUM_WARPS;
|
||||
@@ -134,6 +134,6 @@ module VX_csr_data #(
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num];
|
||||
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid];
|
||||
|
||||
endmodule
|
||||
@@ -51,9 +51,7 @@ module VX_csr_io_arb #(
|
||||
|
||||
end else begin
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i));
|
||||
assign out_csr_io_req_rw[i] = in_csr_io_req_rw;
|
||||
assign out_csr_io_req_addr[i] = in_csr_io_req_addr;
|
||||
@@ -78,7 +76,7 @@ module VX_csr_io_arb #(
|
||||
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel];
|
||||
assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel];
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i));
|
||||
end
|
||||
|
||||
|
||||
@@ -15,11 +15,11 @@ module VX_csr_unit #(
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_exu_to_cmt_if csr_commit_if
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_exu_to_cmt_if csr_pipe_commit_if();
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_csr_rsp_if csr_pipe_rsp_if();
|
||||
|
||||
wire select_io_req = csr_io_req_if.valid;
|
||||
wire select_io_rsp;
|
||||
wire select_io_req = csr_io_req_if.valid;
|
||||
wire select_io_rsp;
|
||||
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
@@ -29,7 +29,7 @@ module VX_csr_unit #(
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
|
||||
.csr_rsp_if (csr_pipe_commit_if),
|
||||
.csr_rsp_if (csr_pipe_rsp_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
|
||||
@@ -41,7 +41,7 @@ module VX_csr_unit #(
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
wire [`NW_BITS-1:0] warp_num_s1;
|
||||
wire [`NW_BITS-1:0] wid_s1;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -56,12 +56,12 @@ module VX_csr_unit #(
|
||||
.write_enable (csr_we_s1),
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_addr_s1),
|
||||
.warp_num (csr_pipe_req_if.warp_num)
|
||||
.wid (csr_pipe_req_if.wid)
|
||||
);
|
||||
|
||||
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||
&& (warp_num_s1 == csr_pipe_req_if.warp_num)
|
||||
&& csr_pipe_commit_if.valid;
|
||||
&& (wid_s1 == csr_pipe_req_if.wid)
|
||||
&& csr_pipe_rsp_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
||||
@@ -71,7 +71,7 @@ module VX_csr_unit #(
|
||||
|
||||
always @(*) begin
|
||||
csr_we_s0_unqual = 0;
|
||||
case (csr_pipe_req_if.csr_op)
|
||||
case (csr_pipe_req_if.op)
|
||||
`CSR_RW: begin
|
||||
csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||
csr_we_s0_unqual = 1;
|
||||
@@ -90,7 +90,7 @@ module VX_csr_unit #(
|
||||
|
||||
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
||||
|
||||
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
||||
wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||
@@ -99,13 +99,12 @@ module VX_csr_unit #(
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.wid, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.issue_tag, wid_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_pipe_rsp_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
||||
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
@@ -19,7 +19,7 @@ module VX_decode #(
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`BRU_BITS-1:0] br_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
@@ -100,27 +100,27 @@ module VX_decode #(
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
|
||||
always @(*) begin
|
||||
br_op = `BR_EQ;
|
||||
br_op = `BRU_OTHER;
|
||||
case (opcode)
|
||||
`INST_B: begin
|
||||
case (func3)
|
||||
3'h0: br_op = `BR_EQ;
|
||||
3'h1: br_op = `BR_NE;
|
||||
3'h4: br_op = `BR_LT;
|
||||
3'h5: br_op = `BR_GE;
|
||||
3'h6: br_op = `BR_LTU;
|
||||
3'h7: br_op = `BR_GEU;
|
||||
3'h0: br_op = `BRU_EQ;
|
||||
3'h1: br_op = `BRU_NE;
|
||||
3'h4: br_op = `BRU_LT;
|
||||
3'h5: br_op = `BRU_GE;
|
||||
3'h6: br_op = `BRU_LTU;
|
||||
3'h7: br_op = `BRU_GEU;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_JAL: br_op = `BR_JAL;
|
||||
`INST_JALR: br_op = `BR_JALR;
|
||||
`INST_JAL: br_op = `BRU_JAL;
|
||||
`INST_JALR: br_op = `BRU_JALR;
|
||||
`INST_SYS: begin
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BRU_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BRU_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BRU_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BRU_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BRU_DRET;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
@@ -292,18 +292,17 @@ module VX_decode #(
|
||||
|
||||
VX_decode_if decode_tmp_if();
|
||||
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign decode_tmp_if.thread_mask= ifetch_rsp_if.thread_mask;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_tmp_if.thread_mask = ifetch_rsp_if.thread_mask;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
|
||||
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_fpu ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
is_br ? `EX_BRU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
@@ -312,7 +311,7 @@ module VX_decode #(
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
is_br ? `OP_BITS'(br_op) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
@@ -347,28 +346,28 @@ module VX_decode #(
|
||||
is_csr ? 32'(u_12) :
|
||||
src2_imm;
|
||||
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign join_if.is_join = valid_in && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.wstall = valid_in && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
wire stall = ~decode_if.ready && decode_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.wid, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
||||
.out ({decode_if.valid, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
@@ -376,7 +375,7 @@ module VX_decode #(
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_tmp_if.valid && ~stall) begin
|
||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
||||
$write("%t: Core%0d-Decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.wid, decode_tmp_if.curr_PC);
|
||||
print_ex_type(decode_tmp_if.ex_type);
|
||||
$write(", op=");
|
||||
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
||||
@@ -386,6 +385,7 @@ module VX_decode #(
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_BRU) && `BRU_OP(decode_tmp_if.ex_op) == `BRU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
||||
end
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_config.vh"
|
||||
`include "VX_scope.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -38,8 +37,8 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define LATENCY_IDIV 23
|
||||
`define LATENCY_IMUL 2
|
||||
`define LATENCY_IDIV 33
|
||||
`define LATENCY_IMUL 3
|
||||
|
||||
`define LATENCY_FDIV 16
|
||||
`define LATENCY_FSQRT 10
|
||||
@@ -87,72 +86,70 @@
|
||||
`define BYTEEN_BITS 3
|
||||
`define BYTEEN_TYPE(x) x[1:0]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BR_EQ 4'h0
|
||||
`define BR_NE 4'h1
|
||||
`define BR_LT 4'h2
|
||||
`define BR_GE 4'h3
|
||||
`define BR_LTU 4'h4
|
||||
`define BR_GEU 4'h5
|
||||
`define BR_JAL 4'h6
|
||||
`define BR_JALR 4'h7
|
||||
`define BR_ECALL 4'h8
|
||||
`define BR_EBREAK 4'h9
|
||||
`define BR_MRET 4'hA
|
||||
`define BR_SRET 4'hB
|
||||
`define BR_DRET 4'hC
|
||||
`define BR_NO 4'hF
|
||||
`define BR_BITS 4
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BRU 3'h2
|
||||
`define EX_LSU 3'h3
|
||||
`define EX_CSR 3'h4
|
||||
`define EX_MUL 3'h5
|
||||
`define EX_FPU 3'h6
|
||||
`define EX_GPU 3'h7
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NUM_EXS 7
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 5
|
||||
|
||||
`define ALU_ADD 5'h00
|
||||
`define ALU_SUB 5'h01
|
||||
`define ALU_SLL 5'h02
|
||||
`define ALU_SRL 5'h03
|
||||
`define ALU_SRA 5'h04
|
||||
`define ALU_SLT 5'h05
|
||||
`define ALU_SLTU 5'h06
|
||||
`define ALU_XOR 5'h07
|
||||
`define ALU_OR 5'h08
|
||||
`define ALU_AND 5'h09
|
||||
`define ALU_LUI 5'h0A
|
||||
`define ALU_AUIPC 5'h0B
|
||||
`define ALU_BEQ {1'b1, `BR_EQ}
|
||||
`define ALU_BNE {1'b1, `BR_NE}
|
||||
`define ALU_BLT {1'b1, `BR_LT}
|
||||
`define ALU_BGE {1'b1, `BR_GE}
|
||||
`define ALU_BLTU {1'b1, `BR_LTU}
|
||||
`define ALU_BGEU {1'b1, `BR_GEU}
|
||||
`define ALU_JAL {1'b1, `BR_JAL}
|
||||
`define ALU_JALR {1'b1, `BR_JALR}
|
||||
`define ALU_ECALL {1'b1, `BR_ECALL}
|
||||
`define ALU_EBREAK {1'b1, `BR_EBREAK}
|
||||
`define ALU_MRET {1'b1, `BR_MRET}
|
||||
`define ALU_SRET {1'b1, `BR_SRET}
|
||||
`define ALU_DRET {1'b1, `BR_DRET}
|
||||
`define ALU_OTHER 5'h1F
|
||||
`define ALU_BITS 5
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_SUB 4'b0001
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLT 4'b0100
|
||||
`define ALU_SLTU 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define IS_BR_OP(x) x[4]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
|
||||
`define BRU_EQ 4'b0000
|
||||
`define BRU_NE 4'b0001
|
||||
`define BRU_LTU 4'b0010
|
||||
`define BRU_GEU 4'b0011
|
||||
`define BRU_LT 4'b0110
|
||||
`define BRU_GE 4'b0111
|
||||
`define BRU_JAL 4'b1000
|
||||
`define BRU_JALR 4'b1001
|
||||
`define BRU_ECALL 4'b1010
|
||||
`define BRU_EBREAK 4'b1011
|
||||
`define BRU_MRET 4'b1100
|
||||
`define BRU_SRET 4'b1101
|
||||
`define BRU_DRET 4'b1110
|
||||
`define BRU_OTHER 4'b1111
|
||||
`define BRU_BITS 4
|
||||
`define BRU_OP(x) x[`BRU_BITS-1:0]
|
||||
`define BRU_NEG(x) x[0]
|
||||
`define BRU_LESS(x) x[1]
|
||||
`define BRU_SIGNED(x) x[2]
|
||||
`define BRU_STATIC(x) x[3]
|
||||
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
@@ -213,14 +210,6 @@
|
||||
`define FPU_BITS 5
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
@@ -273,7 +262,7 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, wid
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
@@ -421,34 +410,6 @@
|
||||
|
||||
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct packed {
|
||||
logic [`NW_BITS-1:0] warp_num;
|
||||
logic [`NUM_THREADS-1:0] thread_mask;
|
||||
logic [31:0] curr_PC;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic wb;
|
||||
} issue_data_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_signaling;
|
||||
logic is_quiet;
|
||||
} fp_type_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // Invalid
|
||||
logic DZ; // Divide by zero
|
||||
logic OF; // Overflow
|
||||
logic UF; // Underflow
|
||||
logic NX; // Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFG_BITS $bits(fflags_t)
|
||||
`include "VX_types.vh"
|
||||
|
||||
`endif
|
||||
|
||||
@@ -22,6 +22,7 @@ module VX_execute #(
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
@@ -32,6 +33,7 @@ module VX_execute #(
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
@@ -49,10 +51,19 @@ module VX_execute #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.alu_req_if (alu_req_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.alu_commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
VX_bru_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) bru_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.bru_req_if (bru_req_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.bru_commit_if (bru_commit_if)
|
||||
);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
@@ -116,29 +127,33 @@ module VX_execute #(
|
||||
VX_gpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.gpu_commit_if (gpu_commit_if)
|
||||
);
|
||||
|
||||
assign ebreak = alu_req_if.valid && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
||||
assign ebreak = bru_req_if.valid
|
||||
&& (bru_req_if.op == `BRU_EBREAK
|
||||
|| bru_req_if.op == `BRU_ECALL);
|
||||
|
||||
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN (scope_decode_warp_num, decode_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_decode_wid, decode_if.wid);
|
||||
`SCOPE_ASSIGN (scope_decode_curr_PC, decode_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_decode_is_jal, decode_if.is_jal);
|
||||
`SCOPE_ASSIGN (scope_decode_rs1, decode_if.rs1);
|
||||
`SCOPE_ASSIGN (scope_decode_rs2, decode_if.rs2);
|
||||
|
||||
`SCOPE_ASSIGN (scope_execute_valid, alu_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_execute_warp_num, alu_req_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_execute_wid, alu_req_if.wid);
|
||||
`SCOPE_ASSIGN (scope_execute_curr_PC, alu_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_execute_rd, alu_req_if.rd);
|
||||
`SCOPE_ASSIGN (scope_execute_a, alu_req_if.rs1_data);
|
||||
`SCOPE_ASSIGN (scope_execute_b, alu_req_if.rs2_data);
|
||||
|
||||
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (scope_writeback_warp_num, writeback_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid);
|
||||
`SCOPE_ASSIGN (scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_writeback_wb, writeback_if.wb);
|
||||
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
|
||||
|
||||
@@ -14,9 +14,27 @@ module VX_fpu_unit #(
|
||||
// outputs
|
||||
VX_fpu_to_cmt_if fpu_commit_if
|
||||
);
|
||||
|
||||
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
|
||||
wire [`FRM_BITS-1:0] frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
|
||||
// resolve dynamic FRM
|
||||
wire [`FRM_BITS-1:0] frm, frm_tmp;
|
||||
assign csr_to_fpu_if.wid = fpu_req_if.wid;
|
||||
assign frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||
|
||||
// use a skid buffer since fpcore has realtime backpressure
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`ISTAG_BITS + `NW_BITS + 32 + `FPU_BITS + `FRM_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.SIZE (0)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
.data_in ({fpu_req_if.issue_tag, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.op, frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.data_out ({fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.wid, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.op, frm_tmp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
||||
.ready_out (fpu_req_tmp_if.ready),
|
||||
.valid_out (fpu_req_tmp_if.valid)
|
||||
);
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
|
||||
@@ -24,17 +42,17 @@ module VX_fpu_unit #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
.valid_in (fpu_req_tmp_if.valid),
|
||||
.ready_in (fpu_req_tmp_if.ready),
|
||||
|
||||
.tag_in (fpu_req_if.issue_tag),
|
||||
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
.op (fpu_req_tmp_if.op),
|
||||
.frm (frm_tmp),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.dataa (fpu_req_tmp_if.rs1_data),
|
||||
.datab (fpu_req_tmp_if.rs2_data),
|
||||
.datac (fpu_req_tmp_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
@@ -42,7 +60,7 @@ module VX_fpu_unit #(
|
||||
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
|
||||
.ready_out (fpu_commit_if.ready),
|
||||
.ready_out (1'b1),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
@@ -57,17 +75,17 @@ module VX_fpu_unit #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
.valid_in (fpu_req_tmp_if.valid),
|
||||
.ready_in (fpu_req_tmp_if.ready),
|
||||
|
||||
.tag_in (fpu_req_if.issue_tag),
|
||||
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
.op (fpu_req_tmp_if.op),
|
||||
.frm (frm_tmp),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.dataa (fpu_req_tmp_if.rs1_data),
|
||||
.datab (fpu_req_tmp_if.rs2_data),
|
||||
.datac (fpu_req_tmp_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
@@ -75,7 +93,7 @@ module VX_fpu_unit #(
|
||||
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
|
||||
.ready_out (fpu_commit_if.ready),
|
||||
.ready_out (1'b1),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
|
||||
@@ -10,40 +10,43 @@ module VX_gpr_fp_ctrl (
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||
|
||||
// outputs
|
||||
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||
|
||||
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||
VX_gpr_read_if gpr_read_if
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data, rs3_tmp_data;
|
||||
reg read_rs3;
|
||||
|
||||
wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||
|
||||
wire read_fire = gpr_read_if.valid && read_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
read_rs3 <= 0;
|
||||
end else if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// backup original rs1 data
|
||||
always @(posedge clk) begin
|
||||
if (rs3_delay) begin
|
||||
tmp_rs1_data <= rs1_data;
|
||||
if (~gpr_read_if.use_rs3 || rs3_delay) begin
|
||||
rs1_tmp_data <= rs1_data;
|
||||
end
|
||||
rs2_tmp_data <= rs2_data;
|
||||
rs3_tmp_data <= rs1_data;
|
||||
end
|
||||
|
||||
// outputs
|
||||
assign raddr1 = {gpr_read_if.warp_num, (read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1)};
|
||||
wire [`NR_BITS-1:0] rs1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1;
|
||||
assign raddr1 = {gpr_read_if.wid, rs1};
|
||||
assign gpr_read_if.ready = ~rs3_delay;
|
||||
assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data;
|
||||
assign gpr_read_if.rs2_data = rs2_data;
|
||||
assign gpr_read_if.rs3_data = rs1_data;
|
||||
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||
assign gpr_read_if.rs3_data = rs3_tmp_data;
|
||||
|
||||
endmodule
|
||||
@@ -20,12 +20,7 @@ module VX_gpr_ram (
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
if (i == 0) begin
|
||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'h00000000}}; // set r0 = 0
|
||||
end
|
||||
`ifndef SYNTHESIS
|
||||
else begin
|
||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'hdeadbeef}};
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -48,8 +43,7 @@ module VX_gpr_ram (
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
|
||||
integer i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign write_bit_mask[i] = {32{~we[i]}};
|
||||
end
|
||||
|
||||
@@ -61,9 +55,8 @@ module VX_gpr_ram (
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
integer j;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (j = 0; j < 32; j++) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer j = 0; j < 32; j++) begin
|
||||
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||
end
|
||||
@@ -72,7 +65,7 @@ module VX_gpr_ram (
|
||||
assign rs1_data = tmp_a;
|
||||
assign rs2_data = tmp_b;
|
||||
`endif
|
||||
for (i = 0; i < 'NT; i=i+4) begin
|
||||
for (integer i = 0; i < 'NT; i=i+4) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
|
||||
@@ -21,10 +21,10 @@ module VX_gpr_stage #(
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask),
|
||||
.waddr ({writeback_if.warp_num, writeback_if.rd}),
|
||||
.waddr ({writeback_if.wid, writeback_if.rd}),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
.rs2 ({gpr_read_if.warp_num, gpr_read_if.rs2}),
|
||||
.rs2 ({gpr_read_if.wid, gpr_read_if.rs2}),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data)
|
||||
);
|
||||
@@ -39,9 +39,16 @@ module VX_gpr_stage #(
|
||||
.gpr_read_if(gpr_read_if)
|
||||
);
|
||||
`else
|
||||
assign raddr1 = {gpr_read_if.warp_num, gpr_read_if.rs1};
|
||||
assign gpr_read_if.rs1_data = rs1_data;
|
||||
assign gpr_read_if.rs2_data = rs2_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
rs1_tmp_data <= rs1_data;
|
||||
rs2_tmp_data <= rs2_data;
|
||||
end
|
||||
|
||||
assign raddr1 = {gpr_read_if.wid, gpr_read_if.rs1};
|
||||
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||
assign gpr_read_if.rs3_data = 0;
|
||||
assign gpr_read_if.ready = 1;
|
||||
|
||||
@@ -53,6 +60,4 @@ module VX_gpr_stage #(
|
||||
`UNUSED_VAR (rs3);
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1; // writes are stall-free
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
module VX_gpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Input
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
@@ -10,74 +13,74 @@ module VX_gpu_unit #(
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if
|
||||
);
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
||||
wire is_wspawn = (gpu_req_if.op == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op == `GPU_BAR);
|
||||
|
||||
wire gpu_req_fire = gpu_req_if.valid && gpu_commit_if.ready;
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
||||
wire gpu_req_fire = gpu_req_if.valid;
|
||||
|
||||
// tmc
|
||||
|
||||
genvar i;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign warp_ctl_if.change_mask = is_tmc && gpu_req_fire;
|
||||
assign warp_ctl_if.thread_mask = tmc_new_mask;
|
||||
|
||||
// barrier
|
||||
|
||||
assign warp_ctl_if.is_barrier = is_bar && gpu_req_fire;
|
||||
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign warp_ctl_if.barrier_num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
assign tmc.valid = gpu_req_fire && is_tmc;
|
||||
assign tmc.thread_mask = tmc_new_mask;
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign warp_ctl_if.wspawn = is_wspawn && gpu_req_fire;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_wmask = wspawn_wmask;
|
||||
assign wspawn.valid = gpu_req_fire && is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = wspawn_pc;
|
||||
|
||||
// split
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
wire [`NUM_THREADS-1:0] split_then_mask;
|
||||
wire [`NUM_THREADS-1:0] split_else_mask;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
||||
assign split_new_use_mask[i] = gpu_req_if.thread_mask[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = gpu_req_if.thread_mask[i] & (!curr_bool);
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = gpu_req_if.rs1_data[i][0];
|
||||
assign split_then_mask[i] = gpu_req_if.thread_mask[i] & taken;
|
||||
assign split_else_mask[i] = gpu_req_if.thread_mask[i] & ~taken;
|
||||
end
|
||||
|
||||
wire [`NT_BITS:0] num_valids;
|
||||
assign split.valid = gpu_req_fire && is_split;
|
||||
assign split.diverged = (| split_then_mask) && (| split_else_mask);
|
||||
assign split.then_mask = split_then_mask;
|
||||
assign split.else_mask = split_else_mask;
|
||||
assign split.pc = gpu_req_if.curr_PC + 4;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_THREADS)
|
||||
) valids_counter (
|
||||
.valids(gpu_req_if.thread_mask),
|
||||
.count (num_valids)
|
||||
);
|
||||
// barrier
|
||||
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1) && gpu_req_fire;
|
||||
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
||||
assign barrier.valid = is_bar && gpu_req_fire;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
|
||||
// commit
|
||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.issue_tag = gpu_req_if.issue_tag;
|
||||
assign gpu_commit_if.data = 0;
|
||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||
// output
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + $bits(gpu_tmc_t) + $bits(gpu_wspawn_t) + $bits(gpu_split_t) + $bits(gpu_barrier_t))
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (0),
|
||||
.flush (0),
|
||||
.in ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.wid, tmc, wspawn, split, barrier}),
|
||||
.out ({gpu_commit_if.valid, gpu_commit_if.issue_tag, warp_ctl_if.wid, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign gpu_req_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
@@ -25,7 +25,7 @@ module VX_icache_stage #(
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.warp_num;
|
||||
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid;
|
||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -46,13 +46,13 @@ module VX_icache_stage #(
|
||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag};
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
|
||||
`else
|
||||
assign icache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
|
||||
assign ifetch_rsp_if.warp_num = rsp_tag;
|
||||
assign ifetch_rsp_if.wid = rsp_tag;
|
||||
assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag];
|
||||
assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag];
|
||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||
@@ -61,7 +61,7 @@ module VX_icache_stage #(
|
||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_icache_req_warp_num, ifetch_req_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid);
|
||||
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (scope_icache_req_tag, icache_req_if.tag);
|
||||
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
|
||||
@@ -74,10 +74,10 @@ module VX_icache_stage #(
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||
$display("%t: I$%0d req: warp=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.warp_num, ifetch_req_if.curr_PC);
|
||||
$display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.curr_PC);
|
||||
end
|
||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
||||
$display("%t: I$%0d rsp: wid=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -26,16 +26,18 @@ module VX_ipdom_stack #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr <= 0;
|
||||
end else if (push) begin
|
||||
stack_1[wr_ptr] <= q1;
|
||||
stack_2[wr_ptr] <= q2;
|
||||
is_part[wr_ptr] <= 0;
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + 1;
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
is_part[rd_ptr] <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
stack_1[wr_ptr] <= q1;
|
||||
stack_2[wr_ptr] <= q2;
|
||||
is_part[wr_ptr] <= 0;
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + 1;
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
is_part[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ module VX_issue #(
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
@@ -23,13 +24,14 @@ module VX_issue #(
|
||||
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.valid = decode_if.valid && ~schedule_delay;
|
||||
assign gpr_read_if.warp_num = decode_if.warp_num;
|
||||
assign gpr_read_if.wid = decode_if.wid;
|
||||
assign gpr_read_if.rs1 = decode_if.rs1;
|
||||
assign gpr_read_if.rs2 = decode_if.rs2;
|
||||
assign gpr_read_if.rs3 = decode_if.rs3;
|
||||
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
||||
|
||||
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (~bru_req_if.ready && (decode_if.ex_type == `EX_BRU))
|
||||
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||
`ifdef EXT_M_ENABLE
|
||||
@@ -40,9 +42,9 @@ module VX_issue #(
|
||||
`endif
|
||||
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
VX_scheduler #(
|
||||
VX_scoreboard #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) scheduler (
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
@@ -62,117 +64,71 @@ module VX_issue #(
|
||||
.gpr_read_if (gpr_read_if)
|
||||
);
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
VX_issue_if issue_if();
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_if),
|
||||
.gpr_read_if(gpr_read_if),
|
||||
.issue_tag (issue_tag),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.fpu_req_if (fpu_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
assign issue_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign issue_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign issue_if.rs3_data = gpr_read_if.rs3_data;
|
||||
|
||||
wire stall = schedule_delay || ~gpr_read_if.ready;
|
||||
assign decode_if.ready = ~stall;
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) sel_src (
|
||||
.data_in (decode_if.thread_mask),
|
||||
.data_out (tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire stall = schedule_delay || ~gpr_read_if.ready;
|
||||
wire flush = stall; // clear output on stall
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `FRM_BITS + `NT_BITS)
|
||||
) issue_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.frm, tid}),
|
||||
.out ({issue_if.valid, issue_if.issue_tag, issue_if.wid, issue_if.thread_mask, issue_if.curr_PC, issue_if.rd, issue_if.rs1, issue_if.imm, issue_if.rs1_is_PC, issue_if.rs2_is_imm, issue_if.ex_type, issue_if.ex_op, issue_if.wb, issue_if.frm, issue_if.tid})
|
||||
);
|
||||
|
||||
assign decode_if.ready = issue_if.ready;
|
||||
assign issue_if.ready = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `ALU_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~alu_req_if.ready),
|
||||
.flush (stall && alu_req_if.ready),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.issue_tag, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.thread_mask, alu_req_tmp_if.alu_op, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
||||
.out ({alu_req_if.valid, alu_req_if.issue_tag, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.thread_mask, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + 1 + `BYTEEN_BITS + (`NUM_THREADS * 32) + 32 + (`NUM_THREADS * 32) + `NR_BITS + 1)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~lsu_req_if.ready),
|
||||
.flush (stall && lsu_req_if.ready),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.issue_tag, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.thread_mask, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.issue_tag, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data, lsu_req_if.rd, lsu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `CSR_BITS + `CSR_ADDR_BITS + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~csr_req_if.ready),
|
||||
.flush (stall && csr_req_if.ready),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.issue_tag, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.thread_mask, csr_req_tmp_if.csr_op, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
||||
.out ({csr_req_if.valid, csr_req_if.issue_tag, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.thread_mask, csr_req_if.csr_op, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `MUL_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~mul_req_if.ready),
|
||||
.flush (stall && mul_req_if.ready),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.issue_tag, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.thread_mask, mul_req_tmp_if.mul_op, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
||||
.out ({mul_req_if.valid, mul_req_if.issue_tag, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.thread_mask, mul_req_if.mul_op, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `FPU_BITS + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~fpu_req_if.ready),
|
||||
.flush (stall && fpu_req_if.ready),
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.thread_mask, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.frm, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.issue_tag, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.thread_mask, fpu_req_if.fpu_op, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~gpu_req_if.ready),
|
||||
.flush (stall && gpu_req_if.ready),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.issue_tag, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.thread_mask, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.thread_mask, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
||||
VX_issue_demux issue_demux (
|
||||
.issue_if (issue_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||
end
|
||||
if (bru_req_if.valid && bru_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h", $time, CORE_ID, bru_req_if.wid, bru_req_if.curr_PC, bru_req_if.issue_tag, bru_req_if.thread_mask, bru_req_if.rs1_data, bru_req_if.rs2_data, bru_req_if.offset);
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
module VX_issue_demux (
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
input wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
|
||||
VX_issue_if issue_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
@@ -15,74 +14,89 @@ module VX_issue_demux (
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_ALU);
|
||||
assign alu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign alu_req_if.issue_tag = issue_tag;
|
||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.ex_op);
|
||||
assign alu_req_if.rs1_data = decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : gpr_read_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : gpr_read_if.rs2_data;
|
||||
assign alu_req_if.offset = decode_if.imm;
|
||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
||||
assign alu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_ALU);
|
||||
assign alu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign alu_req_if.wid = issue_if.wid;
|
||||
assign alu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign alu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign alu_req_if.op = `ALU_OP(issue_if.ex_op);
|
||||
assign alu_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||
assign alu_req_if.rs2_is_imm = issue_if.rs2_is_imm;
|
||||
assign alu_req_if.imm = issue_if.imm;
|
||||
assign alu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = issue_if.rs2_data;
|
||||
|
||||
// BRU unit
|
||||
assign bru_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_BRU);
|
||||
assign bru_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign bru_req_if.wid = issue_if.wid;
|
||||
assign bru_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign bru_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign bru_req_if.op = `BRU_OP(issue_if.ex_op);
|
||||
assign bru_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||
assign bru_req_if.rs1_data = issue_if.rs1_data[issue_if.tid];
|
||||
assign bru_req_if.rs2_data = issue_if.rs2_data[issue_if.tid];
|
||||
assign bru_req_if.offset = issue_if.imm;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_LSU);
|
||||
assign lsu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign lsu_req_if.issue_tag = issue_tag;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.ex_op);
|
||||
assign lsu_req_if.base_addr = gpr_read_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_read_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
assign lsu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_LSU);
|
||||
assign lsu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign lsu_req_if.wid = issue_if.wid;
|
||||
assign lsu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign lsu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign lsu_req_if.rw = `LSU_RW(issue_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(issue_if.ex_op);
|
||||
assign lsu_req_if.base_addr = issue_if.rs1_data;
|
||||
assign lsu_req_if.store_data = issue_if.rs2_data;
|
||||
assign lsu_req_if.offset = issue_if.imm;
|
||||
assign lsu_req_if.rd = issue_if.rd;
|
||||
assign lsu_req_if.wb = issue_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_CSR);
|
||||
assign csr_req_if.issue_tag = issue_tag;
|
||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
|
||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
|
||||
assign csr_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_CSR);
|
||||
assign csr_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign csr_req_if.wid = issue_if.wid;
|
||||
assign csr_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign csr_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign csr_req_if.op = `CSR_OP(issue_if.ex_op);
|
||||
assign csr_req_if.csr_addr = issue_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
assign csr_req_if.csr_mask = issue_if.rs2_is_imm ? 32'(issue_if.rs1) : issue_if.rs1_data[0];
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign mul_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_MUL);
|
||||
assign mul_req_if.issue_tag = issue_tag;
|
||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.ex_op);
|
||||
assign mul_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign mul_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_MUL);
|
||||
assign mul_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign mul_req_if.wid = issue_if.wid;
|
||||
assign mul_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign mul_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign mul_req_if.op = `MUL_OP(issue_if.ex_op);
|
||||
assign mul_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = issue_if.rs2_data;
|
||||
`endif
|
||||
|
||||
// FPU unit
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_FPU);
|
||||
assign fpu_req_if.issue_tag = issue_tag;
|
||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign fpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.ex_op);
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
assign fpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
|
||||
assign fpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_FPU);
|
||||
assign fpu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign fpu_req_if.wid = issue_if.wid;
|
||||
assign fpu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign fpu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign fpu_req_if.op = `FPU_OP(issue_if.ex_op);
|
||||
assign fpu_req_if.frm = issue_if.frm;
|
||||
assign fpu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = issue_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = issue_if.rs3_data;
|
||||
`endif
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_GPU);
|
||||
assign gpu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign gpu_req_if.issue_tag = issue_tag;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.ex_op);
|
||||
assign gpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_read_if.rs2_data[0];
|
||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
||||
assign gpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_GPU);
|
||||
assign gpu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign gpu_req_if.wid = issue_if.wid;
|
||||
assign gpu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign gpu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign gpu_req_if.op = `GPU_OP(issue_if.ex_op);
|
||||
assign gpu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = issue_if.rs2_data[0];
|
||||
|
||||
endmodule
|
||||
@@ -19,24 +19,24 @@ module VX_lsu_unit #(
|
||||
VX_exu_to_cmt_if lsu_commit_if
|
||||
);
|
||||
|
||||
wire use_valid;
|
||||
wire [`NUM_THREADS-1:0] use_thread_mask;
|
||||
wire use_req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
||||
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
||||
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
||||
wire [1:0] use_req_sext;
|
||||
wire [`NR_BITS-1:0] use_rd;
|
||||
wire [`NW_BITS-1:0] use_warp_num;
|
||||
wire [`ISTAG_BITS-1:0] use_issue_tag;
|
||||
wire use_wb;
|
||||
wire [31:0] use_pc;
|
||||
wire valid_in;
|
||||
wire ready_in;
|
||||
|
||||
genvar i;
|
||||
wire [`NUM_THREADS-1:0] req_thread_mask;
|
||||
wire req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] req_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] req_offset;
|
||||
wire [`NUM_THREADS-1:0][3:0] req_byteen;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [1:0] req_sext;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire [`NW_BITS-1:0] req_wid;
|
||||
wire [`ISTAG_BITS-1:0] req_issue_tag;
|
||||
wire req_wb;
|
||||
wire [31:0] req_pc;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_address;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
@@ -63,38 +63,39 @@ module VX_lsu_unit #(
|
||||
endcase
|
||||
end
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign mem_req_addr[i] = full_address[i][31:2];
|
||||
assign mem_req_offset[i] = full_address[i][1:0];
|
||||
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
|
||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||
end
|
||||
|
||||
wire stall_in = ~dcache_req_if.ready && use_valid;
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
end
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_address;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32)
|
||||
) lsu_req_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
.flush (0),
|
||||
.in ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.out ({use_valid, use_warp_num, use_thread_mask, use_issue_tag, use_address, use_req_sext, use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd, use_wb, use_pc})
|
||||
// use a skid buffer because the dcache's ready signal is combinational
|
||||
// use buffer size of two for stall-free execution
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32),
|
||||
.SIZE (2)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_if.valid),
|
||||
.ready_in (lsu_req_if.ready),
|
||||
.data_in ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.data_out ({req_wid, req_thread_mask, req_issue_tag, req_address, req_sext, req_rw, req_addr, req_offset, req_byteen, req_data, req_rd, req_wb, req_pc}),
|
||||
.ready_out (ready_in),
|
||||
.valid_out (valid_in)
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask_buf;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] mem_rsp_data_prev_buf;
|
||||
|
||||
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_all_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] mem_rsp_warp_num_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] mem_rsp_wid_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
||||
|
||||
@@ -105,47 +106,56 @@ module VX_lsu_unit #(
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
||||
wire [1:0] mem_rsp_sext = mem_rsp_sext_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_all = mem_rsp_data_all_buf [rsp_issue_tag];
|
||||
wire [`NW_BITS-1:0] mem_rsp_warp_num = mem_rsp_warp_num_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_prev= mem_rsp_data_prev_buf [rsp_issue_tag];
|
||||
wire [`NW_BITS-1:0] mem_rsp_wid = mem_rsp_wid_buf [rsp_issue_tag];
|
||||
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
||||
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||
|
||||
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcache_req_fire && (0 == use_req_rw)) begin
|
||||
mem_rsp_mask_buf [use_issue_tag] <= use_thread_mask;
|
||||
mem_rsp_offset_buf [use_issue_tag] <= use_req_offset;
|
||||
mem_rsp_sext_buf [use_issue_tag] <= use_req_sext;
|
||||
mem_rsp_data_all_buf [use_issue_tag] <= 0;
|
||||
mem_rsp_warp_num_buf [use_issue_tag] <= use_warp_num;
|
||||
mem_rsp_curr_PC_buf [use_issue_tag] <= use_pc;
|
||||
mem_rsp_rd_buf [use_issue_tag] <= use_rd;
|
||||
if (dcache_req_fire && (0 == req_rw)) begin
|
||||
mem_rsp_mask_buf [req_issue_tag] <= req_thread_mask;
|
||||
mem_rsp_data_prev_buf [req_issue_tag] <= 0;
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
||||
mem_rsp_data_all_buf [rsp_issue_tag] <= mem_rsp_data_all | mem_rsp_data_curr;
|
||||
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
||||
mem_rsp_data_prev_buf [rsp_issue_tag] <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcache_req_fire && (0 == req_rw)) begin
|
||||
mem_rsp_offset_buf [req_issue_tag] <= req_offset;
|
||||
mem_rsp_sext_buf [req_issue_tag] <= req_sext;
|
||||
mem_rsp_wid_buf [req_issue_tag] <= req_wid;
|
||||
mem_rsp_curr_PC_buf [req_issue_tag] <= req_pc;
|
||||
mem_rsp_rd_buf [req_issue_tag] <= req_rd;
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_in;
|
||||
|
||||
// Core Request
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{use_valid}} & use_thread_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
||||
assign dcache_req_if.byteen = use_req_byteen;
|
||||
assign dcache_req_if.addr = use_req_addr;
|
||||
assign dcache_req_if.data = use_req_data;
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~stall_in}} & req_thread_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
|
||||
assign dcache_req_if.byteen = req_byteen;
|
||||
assign dcache_req_if.addr = req_addr;
|
||||
assign dcache_req_if.data = req_data;
|
||||
|
||||
assign ready_in = dcache_req_if.ready && ~stall_in;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, use_issue_tag};
|
||||
assign dcache_req_if.tag = {req_pc, req_wb, req_rd, req_wid, req_issue_tag};
|
||||
`else
|
||||
assign dcache_req_if.tag = use_issue_tag;
|
||||
assign dcache_req_if.tag = req_issue_tag;
|
||||
`endif
|
||||
|
||||
// Core Response
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
||||
always @(*) begin
|
||||
case (mem_rsp_sext)
|
||||
@@ -156,46 +166,60 @@ module VX_lsu_unit #(
|
||||
end
|
||||
end
|
||||
|
||||
wire is_store_rsp = dcache_req_fire && use_req_rw;
|
||||
wire is_load_rsp = (| dcache_rsp_if.valid) && (0 == mem_rsp_mask_n);
|
||||
reg is_load_rsp;
|
||||
reg [`NUM_THREADS-1:0][31:0] load_data;
|
||||
reg [`ISTAG_BITS-1:0] rsp_issue_tag_r;
|
||||
|
||||
assign lsu_commit_if.valid = is_load_rsp || is_store_rsp;
|
||||
assign lsu_commit_if.issue_tag = is_store_rsp ? use_issue_tag : rsp_issue_tag;
|
||||
assign lsu_commit_if.data = mem_rsp_data_curr | mem_rsp_data_all;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
is_load_rsp <= 0;
|
||||
end else begin
|
||||
is_load_rsp <= dcache_rsp_fire && (0 == mem_rsp_mask_n);
|
||||
load_data <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||
rsp_issue_tag_r <= rsp_issue_tag;
|
||||
end
|
||||
end
|
||||
|
||||
wire is_store_req = dcache_req_fire && req_rw;
|
||||
assign stall_in = is_load_rsp && valid_in && req_rw; // LOAD has priority
|
||||
|
||||
assign lsu_commit_if.valid = is_load_rsp || is_store_req;
|
||||
assign lsu_commit_if.issue_tag = is_load_rsp ? rsp_issue_tag_r : req_issue_tag;
|
||||
assign lsu_commit_if.data = load_data;
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = lsu_commit_if.ready && ~is_store_rsp; // STORE has priority
|
||||
assign dcache_rsp_if.ready = 1'b1;
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_addr, use_address);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_addr, req_address);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_rw, dcache_req_if.rw );
|
||||
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_warp_num, use_warp_num);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, use_pc);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, req_pc);
|
||||
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_warp_num)
|
||||
`UNUSED_VAR (mem_rsp_wid)
|
||||
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||
`UNUSED_VAR (mem_rsp_rd)
|
||||
`UNUSED_VAR (use_wb)
|
||||
`UNUSED_VAR (req_wb)
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||
$display("%t: D$%0d req: warp=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_warp_num, use_pc, dcache_req_if.valid, use_address, dcache_req_if.tag, use_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||
$display("%t: D$%0d req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_warp_num, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||
$display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_wid, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -83,15 +83,13 @@ module VX_mem_arb #(
|
||||
assign out_mem_req_data = in_mem_req_data [bus_req_sel];
|
||||
assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i));
|
||||
end
|
||||
|
||||
wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0];
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
|
||||
assign in_mem_rsp_data[i] = out_mem_rsp_data;
|
||||
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
|
||||
|
||||
@@ -11,52 +11,36 @@ module VX_mul_unit #(
|
||||
|
||||
// Outputs
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
);
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = alu_req_if.mul_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
);
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`MUL_BITS-1:0] alu_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1, alu_in2;
|
||||
wire valid_in, ready_in;
|
||||
|
||||
// use a skid buffer due to MUL/DIV output arbitration adding realtime backpressure
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`ISTAG_BITS + `MUL_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.SIZE (0)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_if.valid),
|
||||
.ready_in (alu_req_if.ready),
|
||||
.data_in ({alu_req_if.issue_tag, alu_req_if.op, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_out ({issue_tag, alu_op, alu_in1, alu_in2}),
|
||||
.ready_out (ready_in),
|
||||
.valid_out (valid_in)
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result, div_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire is_mulw = (alu_op == `MUL_MUL);
|
||||
wire is_mulw_out;
|
||||
|
||||
wire stall_mul, stall_div;
|
||||
|
||||
wire is_mul_mul = (alu_op == `MUL_MUL);
|
||||
wire is_mul_mul_out;
|
||||
|
||||
wire is_div_divu = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
reg [`NUM_THREADS-1:0] is_div_divu_qual;
|
||||
wire [`NUM_THREADS-1:0] is_div_divu_out;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
reg [32:0] div_in1, div_in2;
|
||||
|
||||
// handle divide by zero
|
||||
always @(*) begin
|
||||
is_div_divu_qual[i] = is_div_divu;
|
||||
div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||
div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
if (0 == alu_in2[i]) begin
|
||||
if (is_div_divu) begin
|
||||
div_in1 = {1'b0, 32'hFFFFFFFF}; // quotient = (0xFFFFFFFF / 1)
|
||||
div_in2 = 1;
|
||||
end else begin
|
||||
is_div_divu_qual[i] = 1; // remainder = (in1 / 1)
|
||||
div_in2 = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [63:0] mul_result_tmp;
|
||||
wire [31:0] div_result_tmp;
|
||||
wire [31:0] rem_result_tmp;
|
||||
|
||||
VX_multiplier #(
|
||||
.WIDTHA(33),
|
||||
@@ -67,12 +51,71 @@ module VX_mul_unit #(
|
||||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.clk_en(~stall_mul),
|
||||
.clk_en(1'b1),
|
||||
.dataa(mul_in1),
|
||||
.datab(mul_in2),
|
||||
.result(mul_result_tmp)
|
||||
);
|
||||
|
||||
assign mul_result[i] = is_mulw_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
end
|
||||
|
||||
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||
wire mul_valid_out;
|
||||
|
||||
wire mul_fire = valid_in && ready_in && ~`IS_DIV_OP(alu_op);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + 1),
|
||||
.DEPTH(`LATENCY_IMUL)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(1'b1),
|
||||
.in({mul_fire, issue_tag, is_mulw}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mulw_out})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire is_div = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
wire is_signed_div = (alu_op == `MUL_DIV || alu_op == `MUL_REM);
|
||||
reg [`NUM_THREADS-1:0] is_div_qual;
|
||||
wire [`NUM_THREADS-1:0] is_div_out;
|
||||
wire stall_div;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
reg [31:0] div_in1_qual, div_in2_qual;
|
||||
reg [32:0] div_in1, div_in2;
|
||||
wire [31:0] div_result_tmp, rem_result_tmp;
|
||||
|
||||
// handle divide by zero
|
||||
always @(*) begin
|
||||
if (~stall_div) begin
|
||||
is_div_qual[i] = is_div;
|
||||
div_in1_qual = alu_in1[i];
|
||||
div_in2_qual = alu_in2[i];
|
||||
if (0 == alu_in2[i]) begin
|
||||
div_in2_qual = 1;
|
||||
if (is_div) begin
|
||||
div_in1_qual = 32'hFFFFFFFF; // quotient = (0xFFFFFFFF / 1)
|
||||
end else begin
|
||||
is_div_qual[i] = 1; // remainder = (in1 / 1)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// latch divider inputs
|
||||
always @(posedge clk) begin
|
||||
if (~stall_div) begin
|
||||
div_in1 <= {is_signed_div & alu_in1[i][31], div_in1_qual};
|
||||
div_in2 <= {is_signed_div & alu_in2[i][31], div_in2_qual};
|
||||
end
|
||||
end
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(33),
|
||||
.WIDTHD(33),
|
||||
@@ -90,49 +133,32 @@ module VX_mul_unit #(
|
||||
.quotient(div_result_tmp),
|
||||
.remainder(rem_result_tmp)
|
||||
);
|
||||
|
||||
assign mul_result[i] = is_mul_mul_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
assign div_result[i] = is_div_divu_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
|
||||
assign div_result[i] = is_div_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
end
|
||||
|
||||
wire is_mul_fire = alu_req_if.valid && alu_req_if.ready && ~`IS_DIV_OP(alu_op);
|
||||
wire is_div_fire = alu_req_if.valid && alu_req_if.ready && `IS_DIV_OP(alu_op);
|
||||
|
||||
wire mul_valid_out;
|
||||
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||
wire div_valid_out;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + 1),
|
||||
.DEPTH(`LATENCY_IMUL)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_mul),
|
||||
.in({is_mul_fire, alu_req_if.issue_tag, is_mul_mul}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mul_mul_out})
|
||||
);
|
||||
wire div_fire = valid_in && ready_in && `IS_DIV_OP(alu_op);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
||||
.DEPTH(`LATENCY_IDIV)
|
||||
.DEPTH(`LATENCY_IDIV + 1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_div),
|
||||
.in({is_div_fire, alu_req_if.issue_tag, is_div_divu_qual}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_divu_out})
|
||||
.in({div_fire, issue_tag, is_div_qual}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_out})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire stall_out = (~alu_commit_if.ready && alu_commit_if.valid);
|
||||
assign stall_mul = stall_out;
|
||||
assign stall_div = stall_out
|
||||
|| (mul_valid_out && div_valid_out); // arbitration prioritizes MUL
|
||||
assign stall_div = mul_valid_out && div_valid_out; // arbitration prioritizes MUL
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~(stall_mul || stall_div);
|
||||
assign ready_in = ~stall_div;
|
||||
|
||||
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
||||
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
||||
|
||||
@@ -107,6 +107,7 @@ module VX_pipeline #(
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_bru_req_if bru_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
@@ -117,6 +118,7 @@ module VX_pipeline #(
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
VX_exu_to_cmt_if alu_commit_if();
|
||||
VX_exu_to_cmt_if bru_commit_if();
|
||||
VX_exu_to_cmt_if lsu_commit_if();
|
||||
VX_exu_to_cmt_if csr_commit_if();
|
||||
VX_exu_to_cmt_if mul_commit_if();
|
||||
@@ -157,9 +159,10 @@ module VX_pipeline #(
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if (cmt_to_issue_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
@@ -183,6 +186,7 @@ module VX_pipeline #(
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
@@ -192,6 +196,7 @@ module VX_pipeline #(
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
@@ -208,6 +213,7 @@ module VX_pipeline #(
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
`ifndef VX_PLATFORM
|
||||
`define VX_PLATFORM
|
||||
|
||||
`include "VX_scope.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef NDEBUG
|
||||
@@ -50,6 +52,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
||||
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ task print_ex_op;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
input [`OP_BITS-1:0] op;
|
||||
begin
|
||||
case (ex)
|
||||
case (ex)
|
||||
`EX_ALU: begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
@@ -37,22 +37,27 @@ task print_ex_op;
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
`ALU_BEQ: $write("BEQ");
|
||||
`ALU_BNE: $write("BNE");
|
||||
`ALU_BLT: $write("BLT");
|
||||
`ALU_BGE: $write("BGE");
|
||||
`ALU_BLTU: $write("BLTU");
|
||||
`ALU_BGEU: $write("BGEU");
|
||||
`ALU_JAL: $write("JAL");
|
||||
`ALU_JALR: $write("JALR");
|
||||
`ALU_ECALL: $write("ECALL");
|
||||
`ALU_EBREAK:$write("EBREAK");
|
||||
`ALU_MRET: $write("MRET");
|
||||
`ALU_SRET: $write("SRET");
|
||||
`ALU_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
`EX_BRU: begin
|
||||
case (`BRU_BITS'(op))
|
||||
`BRU_EQ: $write("BEQ");
|
||||
`BRU_NE: $write("BNE");
|
||||
`BRU_LT: $write("BLT");
|
||||
`BRU_GE: $write("BGE");
|
||||
`BRU_LTU: $write("BLTU");
|
||||
`BRU_GEU: $write("BGEU");
|
||||
`BRU_JAL: $write("JAL");
|
||||
`BRU_JALR: $write("JALR");
|
||||
`BRU_ECALL: $write("ECALL");
|
||||
`BRU_EBREAK:$write("EBREAK");
|
||||
`BRU_MRET: $write("MRET");
|
||||
`BRU_SRET: $write("SRET");
|
||||
`BRU_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op))
|
||||
`LSU_LB: $write("LB");
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_scheduler #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
|
||||
wire issue_buf_full;
|
||||
|
||||
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire issue_fire = decode_if.valid && decode_if.ready;
|
||||
|
||||
wire writeback_fire = writeback_if.valid && writeback_if.ready;
|
||||
|
||||
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||
end
|
||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
end
|
||||
if (writeback_fire) begin
|
||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(issue_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
.RPORTS (`NUM_EXS)
|
||||
) issue_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -15,12 +15,12 @@
|
||||
scope_snp_req_invalidate, \
|
||||
scope_snp_req_tag, \
|
||||
scope_snp_rsp_tag, \
|
||||
scope_icache_req_warp_num, \
|
||||
scope_icache_req_wid, \
|
||||
scope_icache_req_addr, \
|
||||
scope_icache_req_tag, \
|
||||
scope_icache_rsp_data, \
|
||||
scope_icache_rsp_tag, \
|
||||
scope_dcache_req_warp_num, \
|
||||
scope_dcache_req_wid, \
|
||||
scope_dcache_req_curr_PC, \
|
||||
scope_dcache_req_addr, \
|
||||
scope_dcache_req_rw, \
|
||||
@@ -29,17 +29,17 @@
|
||||
scope_dcache_req_tag, \
|
||||
scope_dcache_rsp_data, \
|
||||
scope_dcache_rsp_tag, \
|
||||
scope_decode_warp_num, \
|
||||
scope_decode_wid, \
|
||||
scope_decode_curr_PC, \
|
||||
scope_decode_is_jal, \
|
||||
scope_decode_rs1, \
|
||||
scope_decode_rs2, \
|
||||
scope_execute_warp_num, \
|
||||
scope_execute_wid, \
|
||||
scope_execute_curr_PC, \
|
||||
scope_execute_rd, \
|
||||
scope_execute_a, \
|
||||
scope_execute_b, \
|
||||
scope_writeback_warp_num, \
|
||||
scope_writeback_wid, \
|
||||
scope_writeback_curr_PC, \
|
||||
scope_writeback_wb, \
|
||||
scope_writeback_rd, \
|
||||
@@ -103,7 +103,7 @@
|
||||
wire scope_snp_rsp_valid; \
|
||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
|
||||
wire scope_icache_req_valid; \
|
||||
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
|
||||
wire [`NW_BITS-1:0] scope_icache_req_wid; \
|
||||
wire [31:0] scope_icache_req_addr; \
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
|
||||
wire scope_icache_req_ready; \
|
||||
@@ -112,7 +112,7 @@
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
|
||||
wire scope_icache_rsp_ready; \
|
||||
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
|
||||
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
|
||||
wire [`NW_BITS-1:0] scope_dcache_req_wid; \
|
||||
wire [31:0] scope_dcache_req_curr_PC; \
|
||||
wire [63:0] scope_dcache_req_addr; \
|
||||
wire scope_dcache_req_rw; \
|
||||
@@ -131,19 +131,19 @@
|
||||
wire scope_exec_delay; \
|
||||
wire scope_gpr_stage_delay; \
|
||||
wire [`NUM_THREADS-1:0] scope_decode_valid; \
|
||||
wire [`NW_BITS-1:0] scope_decode_warp_num; \
|
||||
wire [`NW_BITS-1:0] scope_decode_wid; \
|
||||
wire [31:0] scope_decode_curr_PC; \
|
||||
wire scope_decode_is_jal; \
|
||||
wire [`NR_BITS-1:0] scope_decode_rs1; \
|
||||
wire [`NR_BITS-1:0] scope_decode_rs2; \
|
||||
wire [`NUM_THREADS-1:0] scope_execute_valid; \
|
||||
wire [`NW_BITS-1:0] scope_execute_warp_num; \
|
||||
wire [`NW_BITS-1:0] scope_execute_wid; \
|
||||
wire [31:0] scope_execute_curr_PC; \
|
||||
wire [`NR_BITS-1:0] scope_execute_rd; \
|
||||
wire [63:0] scope_execute_a; \
|
||||
wire [63:0] scope_execute_b; \
|
||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_wid; \
|
||||
wire [31:0] scope_writeback_curr_PC; \
|
||||
wire scope_writeback_wb; \
|
||||
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
||||
@@ -162,7 +162,7 @@
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_IO \
|
||||
output wire scope_icache_req_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
|
||||
output wire [`NW_BITS-1:0] scope_icache_req_wid, \
|
||||
output wire [31:0] scope_icache_req_addr, \
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
|
||||
output wire scope_icache_req_ready, \
|
||||
@@ -173,7 +173,7 @@
|
||||
|
||||
`define SCOPE_SIGNALS_LSU_IO \
|
||||
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
|
||||
output wire [`NW_BITS-1:0] scope_dcache_req_wid, \
|
||||
output wire [31:0] scope_dcache_req_curr_PC, \
|
||||
output wire [63:0] scope_dcache_req_addr, \
|
||||
output wire scope_dcache_req_rw, \
|
||||
@@ -210,19 +210,19 @@
|
||||
|
||||
`define SCOPE_SIGNALS_BE_IO \
|
||||
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
|
||||
output wire [`NW_BITS-1:0] scope_decode_wid, \
|
||||
output wire [31:0] scope_decode_curr_PC, \
|
||||
output wire scope_decode_is_jal, \
|
||||
output wire [`NR_BITS-1:0] scope_decode_rs1, \
|
||||
output wire [`NR_BITS-1:0] scope_decode_rs2, \
|
||||
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
|
||||
output wire [`NW_BITS-1:0] scope_execute_wid, \
|
||||
output wire [31:0] scope_execute_curr_PC, \
|
||||
output wire [`NR_BITS-1:0] scope_execute_rd, \
|
||||
output wire [63:0] scope_execute_a, \
|
||||
output wire [63:0] scope_execute_b, \
|
||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_wid, \
|
||||
output wire [31:0] scope_writeback_curr_PC, \
|
||||
output wire scope_writeback_wb, \
|
||||
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
||||
@@ -230,7 +230,7 @@
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_BIND \
|
||||
.scope_icache_req_valid (scope_icache_req_valid), \
|
||||
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
|
||||
.scope_icache_req_wid (scope_icache_req_wid), \
|
||||
.scope_icache_req_addr (scope_icache_req_addr), \
|
||||
.scope_icache_req_tag (scope_icache_req_tag), \
|
||||
.scope_icache_req_ready (scope_icache_req_ready), \
|
||||
@@ -241,7 +241,7 @@
|
||||
|
||||
`define SCOPE_SIGNALS_LSU_BIND \
|
||||
.scope_dcache_req_valid (scope_dcache_req_valid), \
|
||||
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
|
||||
.scope_dcache_req_wid (scope_dcache_req_wid), \
|
||||
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
|
||||
.scope_dcache_req_addr (scope_dcache_req_addr), \
|
||||
.scope_dcache_req_rw (scope_dcache_req_rw), \
|
||||
@@ -332,19 +332,19 @@
|
||||
|
||||
`define SCOPE_SIGNALS_BE_BIND \
|
||||
.scope_decode_valid (scope_decode_valid), \
|
||||
.scope_decode_warp_num (scope_decode_warp_num), \
|
||||
.scope_decode_wid (scope_decode_wid), \
|
||||
.scope_decode_curr_PC (scope_decode_curr_PC), \
|
||||
.scope_decode_is_jal (scope_decode_is_jal), \
|
||||
.scope_decode_rs1 (scope_decode_rs1), \
|
||||
.scope_decode_rs2 (scope_decode_rs2), \
|
||||
.scope_execute_valid (scope_execute_valid), \
|
||||
.scope_execute_warp_num (scope_execute_warp_num), \
|
||||
.scope_execute_wid (scope_execute_wid), \
|
||||
.scope_execute_curr_PC (scope_execute_curr_PC), \
|
||||
.scope_execute_rd (scope_execute_rd), \
|
||||
.scope_execute_a (scope_execute_a), \
|
||||
.scope_execute_b (scope_execute_b), \
|
||||
.scope_writeback_valid (scope_writeback_valid), \
|
||||
.scope_writeback_warp_num (scope_writeback_warp_num), \
|
||||
.scope_writeback_wid (scope_writeback_wid), \
|
||||
.scope_writeback_curr_PC(scope_writeback_curr_PC), \
|
||||
.scope_writeback_wb (scope_writeback_wb), \
|
||||
.scope_writeback_rd (scope_writeback_rd), \
|
||||
|
||||
73
hw/rtl/VX_scoreboard.v
Normal file
73
hw/rtl/VX_scoreboard.v
Normal file
@@ -0,0 +1,73 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
);
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.wid] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
|
||||
wire issue_buf_full;
|
||||
|
||||
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire issue_fire = decode_if.valid && decode_if.ready;
|
||||
|
||||
wire reserve_rd = issue_fire && (decode_if.wb != 0);
|
||||
|
||||
wire release_rd = writeback_if.valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
inuse_reg_mask[i] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (reserve_rd) begin
|
||||
inuse_reg_mask[decode_if.wid][decode_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0);
|
||||
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(issue_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
.RPORTS (`NUM_EXS)
|
||||
) issue_table (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.bru_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.bru_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.bru_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
$display("%t: Core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
||||
$time, CORE_ID, decode_if.wid, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
59
hw/rtl/VX_types.vh
Normal file
59
hw/rtl/VX_types.vh
Normal file
@@ -0,0 +1,59 @@
|
||||
`ifndef VX_TYPES
|
||||
`define VX_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
typedef struct packed {
|
||||
logic [`NW_BITS-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] thread_mask;
|
||||
logic [31:0] curr_PC;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic wb;
|
||||
} issue_data_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_signaling;
|
||||
logic is_quiet;
|
||||
} fp_type_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // Invalid
|
||||
logic DZ; // Divide by zero
|
||||
logic OF; // Overflow
|
||||
logic UF; // Underflow
|
||||
logic NX; // Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFG_BITS $bits(fflags_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] thread_mask;
|
||||
} gpu_tmc_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_wspawn_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic diverged;
|
||||
logic [`NUM_THREADS-1:0] then_mask;
|
||||
logic [`NUM_THREADS-1:0] else_mask;
|
||||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_BITS-1:0] id;
|
||||
logic [`NW_BITS:0] num_warps;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`endif
|
||||
@@ -16,64 +16,44 @@ module VX_warp_sched #(
|
||||
|
||||
output wire busy
|
||||
);
|
||||
wire update_use_wspawn;
|
||||
wire update_visible_active;
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_active;
|
||||
reg [`NUM_WARPS-1:0] warp_stalled;
|
||||
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire [`NUM_WARPS-1:0] use_active;
|
||||
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire update_visible_active;
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
||||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||
wire [`NUM_WARPS-1:0] b_mask;
|
||||
wire [`NW_BITS:0] b_count;
|
||||
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||
wire reached_barrier_limit;
|
||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||
|
||||
// wspawn
|
||||
reg [31:0] use_wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] warp_pc;
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire hazard;
|
||||
wire stall_out;
|
||||
wire global_stall;
|
||||
wire real_schedule;
|
||||
|
||||
wire real_schedule;
|
||||
|
||||
wire [31:0] new_pc;
|
||||
|
||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||
|
||||
reg didnt_split;
|
||||
|
||||
wire stall;
|
||||
reg didnt_split;
|
||||
|
||||
always @(posedge clk) begin
|
||||
integer i;
|
||||
if (reset) begin
|
||||
for (i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
for (integer i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
barrier_stall_mask[i] <= 0;
|
||||
end
|
||||
|
||||
@@ -87,92 +67,92 @@ module VX_warp_sched #(
|
||||
didnt_split <= 0;
|
||||
warp_lock <= 0;
|
||||
|
||||
for (i = 1; i < `NUM_WARPS; i++) begin
|
||||
for (integer i = 1; i < `NUM_WARPS; i++) begin
|
||||
warp_pcs[i] <= 0;
|
||||
warp_active[i] <= 0; // Activating first warp
|
||||
visible_active[i] <= 0; // Activating first warp
|
||||
thread_masks[i] <= 1; // Activating first thread in first warp
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
if (warp_ctl_if.wspawn) begin
|
||||
warp_active <= warp_ctl_if.wspawn_wmask;
|
||||
use_wspawn <= warp_ctl_if.wspawn_wmask & (~`NUM_WARPS'(1));
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
end else begin
|
||||
if (warp_ctl_if.wspawn.valid) begin
|
||||
warp_active <= warp_ctl_if.wspawn.wmask;
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.is_barrier) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (warp_ctl_if.barrier.valid) begin
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id] <= 0;
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id][warp_ctl_if.warp_num] <= 1;
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end else if (warp_ctl_if.change_mask) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (0 == warp_ctl_if.thread_mask) begin
|
||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||
end else if (warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.thread_mask;
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
||||
warp_active[warp_ctl_if.wid] <= 0;
|
||||
visible_active[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_if.warp_num] <= join_pc;
|
||||
warp_pcs[join_if.wid] <= join_pc;
|
||||
end
|
||||
thread_masks[join_if.warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (warp_ctl_if.is_split) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (warp_ctl_if.do_split) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.split_new_mask;
|
||||
thread_masks[join_if.wid] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (warp_ctl_if.split.valid) begin
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
|
||||
didnt_split <= 0;
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
if (use_wspawn[warp_to_schedule] && !global_stall) begin
|
||||
use_wspawn[warp_to_schedule] <= 0;
|
||||
thread_masks[warp_to_schedule] <= 1;
|
||||
end
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall_if.wstall) begin
|
||||
warp_stalled[wstall_if.warp_num] <= 1;
|
||||
visible_active[wstall_if.warp_num] <= 0;
|
||||
warp_stalled[wstall_if.wid] <= 1;
|
||||
visible_active[wstall_if.wid] <= 0;
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
if (update_visible_active) begin
|
||||
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock;
|
||||
visible_active <= warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock;
|
||||
end
|
||||
|
||||
// Don't change state if stall
|
||||
if (!global_stall && real_schedule && (thread_mask != 0)) begin
|
||||
visible_active[warp_to_schedule] <= 0;
|
||||
warp_pcs[warp_to_schedule] <= new_pc;
|
||||
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
warp_pcs[branch_ctl_if.warp_num] <= branch_ctl_if.dest;
|
||||
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
|
||||
end
|
||||
warp_stalled[branch_ctl_if.warp_num] <= 0;
|
||||
warp_stalled[branch_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1;
|
||||
if (scheduled_warp && !stall_out) begin
|
||||
warp_lock[warp_to_schedule] <= 1;
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
||||
warp_lock[ifetch_rsp_if.wid] <= 0;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_WARPS-1:0] b_mask = barrier_stall_mask[warp_ctl_if.barrier.id][`NUM_WARPS-1:0];
|
||||
wire [`NW_BITS:0] b_count;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
) barrier_count (
|
||||
@@ -188,26 +168,24 @@ module VX_warp_sched #(
|
||||
.valids(visible_active),
|
||||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
||||
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier_num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier.num_warps);
|
||||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || join_if.is_join);
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask};
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split_save_pc, warp_ctl_if.split_later_mask};
|
||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.warp_num];
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.split.valid
|
||||
&& warp_ctl_if.split.diverged
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.is_split && warp_ctl_if.do_split && (i == warp_ctl_if.warp_num);
|
||||
wire pop = join_if.is_join && (i == join_if.warp_num);
|
||||
wire pop = join_if.is_join
|
||||
&& (i == join_if.wid);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
@@ -217,37 +195,40 @@ module VX_warp_sched #(
|
||||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (ipdom[i]),
|
||||
.q1 (q1),
|
||||
.q2 (q2),
|
||||
.d (ipdom[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num));
|
||||
wire schedule;
|
||||
|
||||
assign hazard = should_bra && schedule;
|
||||
wire branch_hazard = schedule
|
||||
&& branch_ctl_if.valid
|
||||
&& branch_ctl_if.taken
|
||||
&& (branch_ctl_if.wid == warp_to_schedule);
|
||||
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
||||
assign real_schedule = schedule
|
||||
&& !warp_stalled[warp_to_schedule]
|
||||
&& !total_barrier_stall[warp_to_schedule]
|
||||
&& !warp_lock[0];
|
||||
|
||||
assign global_stall = stall || wstall_this_cycle || hazard || !real_schedule || join_if.is_join;
|
||||
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule); // Maybe bug
|
||||
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || join_if.is_join) && !reset;
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall_out || wstall_this_cycle || branch_hazard || join_if.is_join);
|
||||
|
||||
wire real_use_wspawn = use_wspawn[warp_to_schedule];
|
||||
assign global_stall = stall_out || wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join;
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
assign scheduled_warp = !(wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join) && !reset;
|
||||
|
||||
assign warp_pc = use_wspawn[warp_to_schedule] ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
|
||||
assign thread_mask = global_stall ? 0 : (real_use_wspawn ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
||||
assign thread_mask = global_stall ? 0 : (use_wspawn[warp_to_schedule] ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
||||
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wspawn[warp_to_schedule] && !global_stall;
|
||||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
|
||||
wire [`NUM_WARPS-1:0] use_active = (count_visible_active != 0) ? visible_active :
|
||||
(warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock);
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_fixed_arbiter #(
|
||||
@@ -261,17 +242,17 @@ module VX_warp_sched #(
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign stall = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
assign stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||
) fetch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.stall (stall_out),
|
||||
.flush (0),
|
||||
.in ({(| thread_mask), thread_mask, warp_pc, warp_num}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||
.in ({(| thread_mask), thread_mask, warp_pc, warp_to_schedule}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (warp_active != 0);
|
||||
|
||||
@@ -8,6 +8,7 @@ module VX_writeback #(
|
||||
|
||||
// inputs
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
@@ -20,26 +21,24 @@ module VX_writeback #(
|
||||
);
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_warp_num_table, wb_warp_num_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_wid_table, wb_wid_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
||||
|
||||
reg wb_valid, wb_valid_n;
|
||||
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
||||
reg [`NW_BITS-1:0] wb_warp_num, wb_warp_num_n;
|
||||
reg [`NW_BITS-1:0] wb_wid, wb_wid_n;
|
||||
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
||||
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
||||
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
||||
|
||||
reg [`ISTAG_BITS-1:0] wb_index;
|
||||
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||
|
||||
reg wb_valid;
|
||||
reg wb_valid_n;
|
||||
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||
|
||||
always @(*) begin
|
||||
wb_valid_table_n = wb_valid_table;
|
||||
wb_warp_num_table_n = wb_warp_num_table;
|
||||
wb_wid_table_n = wb_wid_table;
|
||||
wb_thread_mask_table_n = wb_thread_mask_table;
|
||||
wb_curr_PC_table_n = wb_curr_PC_table;
|
||||
wb_rd_table_n = wb_rd_table;
|
||||
@@ -53,16 +52,25 @@ module VX_writeback #(
|
||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
||||
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
||||
wb_warp_num_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.warp_num;
|
||||
wb_wid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wid;
|
||||
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
||||
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
||||
end
|
||||
|
||||
if (bru_commit_if.valid) begin
|
||||
wb_valid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wb;
|
||||
wb_thread_mask_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.thread_mask;
|
||||
wb_data_table_n [bru_commit_if.issue_tag] = bru_commit_if.data;
|
||||
wb_wid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wid;
|
||||
wb_curr_PC_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.curr_PC;
|
||||
wb_rd_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.rd;
|
||||
end
|
||||
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
||||
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
||||
wb_warp_num_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.warp_num;
|
||||
wb_wid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wid;
|
||||
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
||||
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
||||
end
|
||||
@@ -71,7 +79,7 @@ module VX_writeback #(
|
||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
||||
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
||||
wb_warp_num_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.warp_num;
|
||||
wb_wid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wid;
|
||||
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
||||
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
||||
end
|
||||
@@ -80,7 +88,7 @@ module VX_writeback #(
|
||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
||||
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
||||
wb_warp_num_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.warp_num;
|
||||
wb_wid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wid;
|
||||
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
||||
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
||||
end
|
||||
@@ -89,7 +97,7 @@ module VX_writeback #(
|
||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
||||
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
||||
wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num;
|
||||
wb_wid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wid;
|
||||
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
@@ -98,23 +106,25 @@ module VX_writeback #(
|
||||
wb_valid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wb;
|
||||
wb_thread_mask_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.thread_mask;
|
||||
wb_data_table_n [gpu_commit_if.issue_tag] = gpu_commit_if.data;
|
||||
wb_warp_num_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.warp_num;
|
||||
wb_wid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wid;
|
||||
wb_curr_PC_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.curr_PC;
|
||||
wb_rd_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.rd;
|
||||
end
|
||||
end
|
||||
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
wb_index_n = 0;
|
||||
wb_valid_n = 0;
|
||||
for (i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||
wb_index_n = 0;
|
||||
wb_valid_n = 0;
|
||||
wb_thread_mask_n = {`NUM_THREADS{1'bx}};
|
||||
wb_wid_n = {`NW_BITS{1'bx}};
|
||||
wb_curr_PC_n = {32{1'bx}};
|
||||
wb_data_n = {(`NUM_THREADS * 32){1'bx}};
|
||||
for (integer i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||
if (wb_valid_table_n[i]) begin
|
||||
wb_index_n = `ISTAG_BITS'(i);
|
||||
wb_valid_n = 1;
|
||||
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
||||
wb_warp_num_n = wb_warp_num_table_n[i];
|
||||
wb_wid_n = wb_wid_table_n[i];
|
||||
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
||||
wb_rd_n = wb_rd_table_n[i];
|
||||
wb_data_n = wb_data_table_n[i];
|
||||
@@ -130,15 +140,15 @@ module VX_writeback #(
|
||||
end else begin
|
||||
wb_valid_table <= wb_valid_table_n;
|
||||
wb_thread_mask_table <= wb_thread_mask_table_n;
|
||||
wb_warp_num_table <= wb_warp_num_table_n;
|
||||
wb_wid_table <= wb_wid_table_n;
|
||||
wb_curr_PC_table <= wb_curr_PC_table_n;
|
||||
wb_rd_table <= wb_rd_table_n;
|
||||
wb_data_table <= wb_data_table_n;
|
||||
|
||||
wb_index <= wb_index_n;
|
||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
||||
wb_valid <= wb_valid_n;
|
||||
wb_thread_mask <= wb_thread_mask_n;
|
||||
wb_warp_num <= wb_warp_num_n;
|
||||
wb_wid <= wb_wid_n;
|
||||
wb_curr_PC <= wb_curr_PC_n;
|
||||
wb_rd <= wb_rd_n;
|
||||
wb_data <= wb_data_n;
|
||||
@@ -148,18 +158,10 @@ module VX_writeback #(
|
||||
// writeback request
|
||||
assign writeback_if.valid = wb_valid;
|
||||
assign writeback_if.thread_mask = wb_thread_mask;
|
||||
assign writeback_if.warp_num = wb_warp_num;
|
||||
assign writeback_if.wid = wb_wid;
|
||||
assign writeback_if.curr_PC = wb_curr_PC;
|
||||
assign writeback_if.rd = wb_rd;
|
||||
assign writeback_if.data = wb_data;
|
||||
|
||||
// commit back-pressure
|
||||
assign alu_commit_if.ready = 1'b1;
|
||||
assign lsu_commit_if.ready = 1'b1;
|
||||
assign csr_commit_if.ready = 1'b1;
|
||||
assign mul_commit_if.ready = 1'b1;
|
||||
assign fpu_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
|
||||
@@ -191,8 +191,7 @@ module Vortex (
|
||||
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
||||
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID(i)
|
||||
) cluster (
|
||||
@@ -358,7 +357,7 @@ module Vortex (
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
||||
|
||||
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
// Core Request
|
||||
assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i];
|
||||
assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i];
|
||||
|
||||
36
hw/rtl/cache/VX_bank.v
vendored
36
hw/rtl/cache/VX_bank.v
vendored
@@ -108,7 +108,7 @@ module VX_bank #(
|
||||
wire[31:0] debug_pc_st0;
|
||||
wire debug_wb_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
||||
wire[`NW_BITS-1:0] debug_wid_st0;
|
||||
wire debug_rw_st0;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st0;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st0;
|
||||
@@ -117,7 +117,7 @@ module VX_bank #(
|
||||
wire[31:0] debug_pc_st1e;
|
||||
wire debug_wb_st1e;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
||||
wire[`NW_BITS-1:0] debug_wid_st1e;
|
||||
wire debug_rw_st1e;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st1e;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
||||
@@ -126,7 +126,7 @@ module VX_bank #(
|
||||
wire[31:0] debug_pc_st2;
|
||||
wire debug_wb_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
||||
wire[`NW_BITS-1:0] debug_wid_st2;
|
||||
wire debug_rw_st2;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st2;
|
||||
@@ -271,10 +271,9 @@ module VX_bank #(
|
||||
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
||||
`DEBUG_END
|
||||
|
||||
integer j;
|
||||
always @(*) begin
|
||||
is_fill_in_pipe = 0;
|
||||
for (j = 0; j < STAGE_1_CYCLES; j++) begin
|
||||
for (integer j = 0; j < STAGE_1_CYCLES; j++) begin
|
||||
if (is_fill_st1[j]) begin
|
||||
is_fill_in_pipe = 1;
|
||||
end
|
||||
@@ -360,7 +359,7 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -375,8 +374,7 @@ module VX_bank #(
|
||||
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
||||
);
|
||||
|
||||
genvar i;
|
||||
for (i = 1; i < STAGE_1_CYCLES; i++) begin
|
||||
for (genvar i = 1; i < STAGE_1_CYCLES; i++) begin
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
|
||||
) s0_1_cc (
|
||||
@@ -446,13 +444,13 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
.debug_pc_st1e(debug_pc_st1e),
|
||||
.debug_wb_st1e(debug_wb_st1e),
|
||||
.debug_rd_st1e(debug_rd_st1e),
|
||||
.debug_warp_num_st1e(debug_warp_num_st1e),
|
||||
.debug_wid_st1e(debug_wid_st1e),
|
||||
.debug_tagid_st1e(debug_tagid_st1e),
|
||||
`endif
|
||||
`endif
|
||||
|
||||
.stall (stall_bank_pipe),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
@@ -490,7 +488,7 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -531,7 +529,7 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -543,10 +541,10 @@ module VX_bank #(
|
||||
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
|
||||
|
||||
wire miss_add = miss_add_unqual
|
||||
&& !mrvq_full
|
||||
&& !(cwbq_push_stall
|
||||
|| dwbq_push_stall
|
||||
|| dram_fill_req_stall);
|
||||
&& !mrvq_full
|
||||
&& !(cwbq_push_stall
|
||||
|| dwbq_push_stall
|
||||
|| dram_fill_req_stall);
|
||||
|
||||
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
|
||||
|
||||
@@ -718,7 +716,9 @@ module VX_bank #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
dwbq_dual_valid_sel <= 0;
|
||||
end else if (dwbq_is_dwb_out && dwbq_is_snp_out && (dram_wb_req_fire || snp_rsp_fire)) begin
|
||||
end else if (dwbq_is_dwb_out
|
||||
&& dwbq_is_snp_out
|
||||
&& (dram_wb_req_fire || snp_rsp_fire)) begin
|
||||
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
|
||||
end
|
||||
end
|
||||
|
||||
8
hw/rtl/cache/VX_cache.v
vendored
8
hw/rtl/cache/VX_cache.v
vendored
@@ -132,12 +132,12 @@ module VX_cache #(
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire debug_core_req_wb;
|
||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
||||
wire[`NW_BITS-1:0] debug_core_req_wid;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
|
||||
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -246,10 +246,8 @@ module VX_cache #(
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
|
||||
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
@@ -18,12 +18,10 @@ module VX_cache_core_req_bank_sel #(
|
||||
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
|
||||
output wire core_req_ready
|
||||
);
|
||||
integer i;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
always @(*) begin
|
||||
per_bank_valid = 0;
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||
per_bank_valid[0][i] = core_req_valid[i];
|
||||
end
|
||||
end
|
||||
@@ -33,7 +31,7 @@ module VX_cache_core_req_bank_sel #(
|
||||
always @(*) begin
|
||||
per_bank_valid = 0;
|
||||
per_bank_ready_sel = {NUM_BANKS{1'b1}};
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
|
||||
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
|
||||
end
|
||||
|
||||
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -48,14 +48,12 @@ module VX_cache_core_rsp_merge #(
|
||||
|
||||
wire stall = ~core_rsp_ready && (| core_rsp_valid);
|
||||
|
||||
integer i;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 0;
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
@@ -71,7 +69,7 @@ module VX_cache_core_rsp_merge #(
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 0;
|
||||
core_rsp_tag_unqual = 0;
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& ((main_bank_index == `BANK_BITS'(i))
|
||||
|
||||
3
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
3
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
@@ -106,8 +106,7 @@ module VX_cache_dram_req_arb #(
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
|
||||
end
|
||||
|
||||
|
||||
9
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
9
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -77,9 +77,8 @@ module VX_cache_miss_resrv #(
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||
reg [MRVQ_SIZE-1:0] valid_address_match;
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < MRVQ_SIZE; i++) begin
|
||||
|
||||
for (genvar i = 0; i < MRVQ_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||
end
|
||||
@@ -121,7 +120,6 @@ module VX_cache_miss_resrv #(
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
end else begin
|
||||
|
||||
if (mrvq_push) begin
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
||||
@@ -157,11 +155,10 @@ module VX_cache_miss_resrv #(
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
integer j;
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
for (j = 0; j < MRVQ_SIZE; j++) begin
|
||||
for (integer j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||
|
||||
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
@@ -83,9 +83,7 @@ module VX_snp_forwarder #(
|
||||
end
|
||||
end
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
|
||||
assign snp_fwdout_addr[i] = snp_req_addr;
|
||||
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
|
||||
@@ -110,7 +108,7 @@ module VX_snp_forwarder #(
|
||||
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
|
||||
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
||||
end
|
||||
|
||||
|
||||
3
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
3
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
@@ -34,8 +34,7 @@ module VX_snp_rsp_arb #(
|
||||
assign snp_rsp_valid = fsq_valid;
|
||||
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
|
||||
end
|
||||
|
||||
|
||||
19
hw/rtl/cache/VX_tag_data_access.v
vendored
19
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -30,7 +30,7 @@ module VX_tag_data_access #(
|
||||
input wire[31:0] debug_pc_st1e,
|
||||
input wire debug_wb_st1e,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
||||
input wire[`NW_BITS-1:0] debug_wid_st1e,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
@@ -135,8 +135,7 @@ module VX_tag_data_access #(
|
||||
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
|
||||
);
|
||||
|
||||
genvar i;
|
||||
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
||||
for (genvar i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
||||
) s0_1_cc (
|
||||
@@ -157,11 +156,11 @@ module VX_tag_data_access #(
|
||||
|
||||
if (`WORD_SELECT_WIDTH != 0) begin
|
||||
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (i = 0; i < WORD_SIZE; i++) begin
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||
end
|
||||
end else begin
|
||||
for (i = 0; i < WORD_SIZE; i++) begin
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||
end
|
||||
end
|
||||
@@ -176,7 +175,7 @@ module VX_tag_data_access #(
|
||||
&& ~is_snp_st1e
|
||||
&& ~real_writefill;
|
||||
|
||||
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||
&& should_write;
|
||||
|
||||
@@ -218,15 +217,15 @@ module VX_tag_data_access #(
|
||||
if (valid_req_st1e) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||
$display("%t: bank%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||
$display("%t: bank%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||
end
|
||||
end else
|
||||
if (miss_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||
$display("%t: bank%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||
$display("%t: bank%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -41,8 +41,6 @@ module VX_fp_fpga (
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg fmadd_negate;
|
||||
|
||||
genvar i;
|
||||
|
||||
always @(*) begin
|
||||
core_select = 0;
|
||||
fmadd_negate = 0;
|
||||
@@ -246,7 +244,7 @@ module VX_fp_fpga (
|
||||
.valid_out (fp_valid)
|
||||
);
|
||||
|
||||
for (i = 0; i < NUM_FPC; i++) begin
|
||||
for (genvar i = 0; i < NUM_FPC; i++) begin
|
||||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||
end
|
||||
|
||||
|
||||
@@ -48,10 +48,8 @@ module VX_fp_noncomp (
|
||||
reg [`NUM_THREADS-1:0][31:0] fcmp_res; // result of comparison
|
||||
reg [`NUM_THREADS-1:0][ 4:0] fcmp_excp; // exception of comparison
|
||||
|
||||
genvar i;
|
||||
|
||||
// Setup
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign a_sign[i] = dataa[i][31];
|
||||
assign a_exponent[i] = dataa[i][30:23];
|
||||
assign a_mantissa[i] = dataa[i][22:0];
|
||||
@@ -77,7 +75,7 @@ module VX_fp_noncomp (
|
||||
end
|
||||
|
||||
// FCLASS
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type[i].is_normal) begin
|
||||
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
|
||||
@@ -101,7 +99,7 @@ module VX_fp_noncomp (
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type[i].is_nan && b_type[i].is_nan)
|
||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
@@ -120,7 +118,7 @@ module VX_fp_noncomp (
|
||||
end
|
||||
|
||||
// Sign Injection
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (op)
|
||||
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
@@ -132,7 +130,7 @@ module VX_fp_noncomp (
|
||||
end
|
||||
|
||||
// Comparison
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (frm)
|
||||
`FRM_RNE: begin
|
||||
@@ -193,7 +191,7 @@ module VX_fp_noncomp (
|
||||
endcase
|
||||
end
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
tmp_valid = 1'b1;
|
||||
case (op)
|
||||
|
||||
@@ -129,11 +129,9 @@ module VX_fpnew #(
|
||||
endcase
|
||||
end
|
||||
|
||||
genvar i;
|
||||
|
||||
`DISABLE_TRACING
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (0 == i) begin
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
@@ -194,8 +192,7 @@ module VX_fpnew #(
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_valid_in = valid_in;
|
||||
assign ready_in = fpu_ready_in
|
||||
|| ~valid_in; // fix
|
||||
assign ready_in = fpu_ready_in;
|
||||
|
||||
assign fpu_tag_in = tag_in;
|
||||
assign tag_out = fpu_tag_out;
|
||||
|
||||
@@ -22,9 +22,7 @@ module VX_fp_add (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
||||
@@ -22,9 +22,7 @@ module VX_fp_div (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_div fdiv (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -21,9 +21,7 @@ module VX_fp_ftoi (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_ftoi ftoi (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -21,9 +21,7 @@ module VX_fp_ftou (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_ftou ftou (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -21,9 +21,7 @@ module VX_fp_itof (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_itof itof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -28,9 +28,7 @@ module VX_fp_madd (
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys0 (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
||||
@@ -28,9 +28,7 @@ module VX_fp_msub (
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys0 (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
||||
@@ -22,9 +22,7 @@ module VX_fp_mul (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
||||
@@ -21,9 +21,7 @@ module VX_fp_sqrt (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_sqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -22,9 +22,7 @@ module VX_fp_sub (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
||||
@@ -21,9 +21,7 @@ module VX_fp_utof (
|
||||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
acl_fp_utof utof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
||||
@@ -7,18 +7,22 @@ interface VX_alu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
`DEBUG_END
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
wire [`ALU_BITS-1:0] op;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [31:0] imm;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire [31:0] offset;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
interface VX_branch_ctl_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire taken;
|
||||
wire [31:0] dest;
|
||||
|
||||
|
||||
29
hw/rtl/interfaces/VX_bru_req_if.v
Normal file
29
hw/rtl/interfaces/VX_bru_req_if.v
Normal file
@@ -0,0 +1,29 @@
|
||||
`ifndef VX_BRANCH_REQ_IF
|
||||
`define VX_BRANCH_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_bru_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`BRU_BITS-1:0] op;
|
||||
|
||||
wire rs1_is_PC;
|
||||
|
||||
wire [31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
|
||||
wire [31:0] offset;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -7,7 +7,7 @@ interface VX_cmt_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_cmt_to_issue_if ();
|
||||
|
||||
wire alu_valid;
|
||||
wire bru_valid;
|
||||
wire lsu_valid;
|
||||
wire csr_valid;
|
||||
wire mul_valid;
|
||||
@@ -13,6 +14,7 @@ interface VX_cmt_to_issue_if ();
|
||||
wire gpu_valid;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] alu_tag;
|
||||
wire [`ISTAG_BITS-1:0] bru_tag;
|
||||
wire [`ISTAG_BITS-1:0] lsu_tag;
|
||||
wire [`ISTAG_BITS-1:0] csr_tag;
|
||||
wire [`ISTAG_BITS-1:0] mul_tag;
|
||||
@@ -21,6 +23,7 @@ interface VX_cmt_to_issue_if ();
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
issue_data_t alu_data;
|
||||
issue_data_t bru_data;
|
||||
issue_data_t lsu_data;
|
||||
issue_data_t csr_data;
|
||||
issue_data_t mul_data;
|
||||
|
||||
@@ -7,13 +7,13 @@ interface VX_csr_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`CSR_BITS-1:0] csr_op;
|
||||
wire [`CSR_BITS-1:0] op;
|
||||
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
15
hw/rtl/interfaces/VX_csr_rsp_if.v
Normal file
15
hw/rtl/interfaces/VX_csr_rsp_if.v
Normal file
@@ -0,0 +1,15 @@
|
||||
`ifndef VX_CSR_RSP_IF
|
||||
`define VX_CSR_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
interface VX_csr_to_fpu_if ();
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
interface VX_decode_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
|
||||
@@ -5,10 +5,9 @@
|
||||
|
||||
interface VX_exu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -11,13 +11,13 @@ interface VX_fpu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
|
||||
wire [`FPU_BITS-1:0] fpu_op;
|
||||
wire [`FPU_BITS-1:0] op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
|
||||
@@ -10,7 +10,6 @@ interface VX_fpu_to_cmt_if ();
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ interface VX_fpu_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire fflags_NV;
|
||||
wire fflags_DZ;
|
||||
|
||||
@@ -7,7 +7,7 @@ interface VX_gpr_read_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
|
||||
@@ -6,17 +6,15 @@
|
||||
interface VX_gpu_req_if();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
`DEBUG_BEGIN
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
wire [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
wire [`GPU_BITS-1:0] op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ interface VX_ifetch_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] curr_PC;
|
||||
wire ready;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] instr;
|
||||
wire ready;
|
||||
|
||||
39
hw/rtl/interfaces/VX_issue_if.v
Normal file
39
hw/rtl/interfaces/VX_issue_if.v
Normal file
@@ -0,0 +1,39 @@
|
||||
`ifndef VX_ISSUE_IF
|
||||
`define VX_ISSUE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_issue_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire wb;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -6,7 +6,7 @@
|
||||
interface VX_join_if ();
|
||||
|
||||
wire is_join;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ interface VX_lsu_req_if ();
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire rw;
|
||||
|
||||
@@ -12,11 +12,11 @@ interface VX_mul_req_if ();
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
wire [`MUL_BITS-1:0] op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
@@ -5,24 +5,12 @@
|
||||
|
||||
interface VX_warp_ctl_if ();
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
wire change_mask;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
|
||||
wire wspawn;
|
||||
wire [31:0] wspawn_pc;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
|
||||
wire is_barrier;
|
||||
wire [`NB_BITS-1:0] barrier_id;
|
||||
wire [`NW_BITS:0] barrier_num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire do_split;
|
||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
||||
wire [31:0] split_save_pc;
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -7,16 +7,14 @@ interface VX_wb_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [31:0] curr_PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
wire ready;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
interface VX_wstall_if();
|
||||
|
||||
wire wstall;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -32,19 +32,17 @@ module VX_cam_buffer #(
|
||||
.valid_out (free_valid)
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
free_slots_n = free_slots;
|
||||
if (acquire_slot) begin
|
||||
free_slots_n[write_addr_r] = 0;
|
||||
end
|
||||
for (i = 0; i < RPORTS; i++) begin
|
||||
for (integer i = 0; i < RPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
free_slots_n[read_addr[i]] = 1;
|
||||
end
|
||||
read_data[i] = entries[read_addr[i]];
|
||||
end
|
||||
end
|
||||
if (acquire_slot) begin
|
||||
free_slots_n[write_addr_r] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -54,12 +52,12 @@ module VX_cam_buffer #(
|
||||
write_addr_r <= ADDRW'(1'b0);
|
||||
end else begin
|
||||
if (acquire_slot) begin
|
||||
assert(1 == free_slots[write_addr]);
|
||||
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr);
|
||||
entries[write_addr] <= write_data;
|
||||
end
|
||||
for (i = 0; i < RPORTS; i++) begin
|
||||
for (integer i = 0; i < RPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
assert(0 == free_slots[read_addr[i]]);
|
||||
assert(0 == free_slots[read_addr[i]]) else $display("%t: freed slot at port %d", $time, read_addr[i]);
|
||||
end
|
||||
end
|
||||
free_slots <= free_slots_n;
|
||||
|
||||
@@ -7,11 +7,9 @@ module VX_countones #(
|
||||
input wire [N-1:0] valids,
|
||||
output reg [$clog2(N):0] count
|
||||
);
|
||||
|
||||
integer i;
|
||||
always @(*) begin
|
||||
count = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
for (integer i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
count = count + 1;
|
||||
end
|
||||
|
||||
@@ -52,15 +52,6 @@ module VX_divide #(
|
||||
reg [WIDTHD-1:0] remainder_unqual;
|
||||
|
||||
always @(*) begin
|
||||
`ifndef SYNTHESIS
|
||||
// this edge case kills verilator in some cases by causing a division
|
||||
// overflow exception. INT_MIN / -1 (on x86)
|
||||
if (numer == {1'b1, (WIDTHN-1)'(1'b0)}
|
||||
&& denom == {WIDTHD{1'b1}}) begin
|
||||
quotient_unqual = 0;
|
||||
remainder_unqual = 0;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
if (NSIGNED && DSIGNED) begin
|
||||
quotient_unqual = $signed(numer) / $signed(denom);
|
||||
@@ -88,21 +79,21 @@ module VX_divide #(
|
||||
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE; i++) begin
|
||||
for (genvar i = 0; i < PIPELINE; i++) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
quotient_pipe[i] <= 0;
|
||||
remainder_pipe[i] <= 0;
|
||||
end
|
||||
else if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
quotient_pipe[i] <= quotient_unqual;
|
||||
remainder_pipe[i] <= remainder_unqual;
|
||||
end else begin
|
||||
quotient_pipe[i] <= quotient_pipe[i-1];
|
||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||
end
|
||||
end else begin
|
||||
if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
quotient_pipe[i] <= quotient_unqual;
|
||||
remainder_pipe[i] <= remainder_unqual;
|
||||
end else begin
|
||||
quotient_pipe[i] <= quotient_pipe[i-1];
|
||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -14,25 +14,53 @@ module VX_elastic_buffer #(
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire empty, full;
|
||||
if (0 == SIZE) begin
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (BUFFERED)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
reg [DATAW-1:0] skid_buffer;
|
||||
reg skid_valid;
|
||||
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
skid_valid <= 0;
|
||||
end else begin
|
||||
if (valid_in && ~ready_out) begin
|
||||
assert(~skid_valid);
|
||||
skid_buffer <= data_in;
|
||||
skid_valid <= 1;
|
||||
end
|
||||
if (ready_out) begin
|
||||
skid_valid <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_out || ~skid_valid;
|
||||
assign data_out = skid_valid ? skid_buffer : data_in;
|
||||
assign valid_out = valid_in || skid_valid;
|
||||
|
||||
end else begin
|
||||
|
||||
wire empty, full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (BUFFERED)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -11,7 +11,7 @@ module VX_fair_arbiter #(
|
||||
output wire grant_valid
|
||||
);
|
||||
|
||||
if (N == 1) begin
|
||||
if (N == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
@@ -33,11 +33,13 @@ module VX_fair_arbiter #(
|
||||
if (reset) begin
|
||||
requests_use <= 0;
|
||||
refill_original <= 0;
|
||||
end else if (refill) begin
|
||||
requests_use <= refill_value;
|
||||
refill_original <= refill_value;
|
||||
end else begin
|
||||
requests_use <= update_value;
|
||||
if (refill) begin
|
||||
requests_use <= refill_value;
|
||||
refill_original <= refill_value;
|
||||
end else begin
|
||||
requests_use <= update_value;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -38,7 +38,6 @@ module VX_generic_queue #(
|
||||
end else if (reading && !writing) begin
|
||||
size_r <= 0;
|
||||
end
|
||||
|
||||
if (writing) begin
|
||||
head_r <= data_in;
|
||||
end
|
||||
@@ -146,7 +145,7 @@ module VX_generic_queue #(
|
||||
end
|
||||
|
||||
bypass_r <= writing
|
||||
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
|
||||
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
|
||||
|
||||
curr_r <= data_in;
|
||||
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
|
||||
|
||||
@@ -24,11 +24,9 @@ module VX_matrix_arbiter #(
|
||||
|
||||
reg [N-1:1] state [0:N-1];
|
||||
wire [N-1:0] pri [0:N-1];
|
||||
|
||||
genvar i, j;
|
||||
|
||||
for (i = 0; i < N; i++) begin
|
||||
for (j = 0; j < N; j++) begin
|
||||
|
||||
for (genvar i = 0; i < N; i++) begin
|
||||
for (genvar j = 0; j < N; j++) begin
|
||||
if (j > i) begin
|
||||
assign pri[j][i] = requests[i] && state[i][j];
|
||||
end
|
||||
@@ -43,13 +41,12 @@ module VX_matrix_arbiter #(
|
||||
assign grant_onehot[i] = requests[i] && !(| pri[i]);
|
||||
end
|
||||
|
||||
for (i = 0; i < N; i++) begin
|
||||
for (j = i + 1; j < N; j++) begin
|
||||
for (genvar i = 0; i < N; i++) begin
|
||||
for (genvar j = i + 1; j < N; j++) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state[i][j] <= 0;
|
||||
end
|
||||
else begin
|
||||
end else begin
|
||||
state[i][j] <= (state[i][j] || grant_onehot[j]) && !grant_onehot[i];
|
||||
end
|
||||
end
|
||||
|
||||
@@ -50,18 +50,18 @@ module VX_multiplier #(
|
||||
|
||||
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE; i++) begin
|
||||
for (genvar i = 0; i < PIPELINE; i++) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
result_pipe[i] <= 0;
|
||||
end
|
||||
else if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
result_pipe[i] <= result_unqual;
|
||||
end else begin
|
||||
result_pipe[i] <= result_pipe[i-1];
|
||||
end
|
||||
end else begin
|
||||
if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
result_pipe[i] <= result_unqual;
|
||||
end else begin
|
||||
result_pipe[i] <= result_pipe[i-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -7,12 +7,10 @@ module VX_onehot_encoder #(
|
||||
output reg [`LOG2UP(N)-1:0] binary,
|
||||
output reg valid
|
||||
);
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
valid = 1'b0;
|
||||
binary = `LOG2UP(N)'(0);
|
||||
for (i = 0; i < N; i++) begin
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
if (onehot[i]) begin
|
||||
valid = 1'b1;
|
||||
binary = `LOG2UP(N)'(i);
|
||||
|
||||
@@ -6,13 +6,11 @@ module VX_priority_encoder #(
|
||||
input wire [N-1:0] data_in,
|
||||
output reg [`LOG2UP(N)-1:0] data_out,
|
||||
output reg valid_out
|
||||
);
|
||||
integer i;
|
||||
|
||||
);
|
||||
always @(*) begin
|
||||
data_out = 0;
|
||||
valid_out = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
for (integer i = N-1; i >= 0; i = i - 1) begin
|
||||
if (data_in[i]) begin
|
||||
data_out = `LOG2UP(N)'(i);
|
||||
valid_out = 1;
|
||||
|
||||
@@ -26,12 +26,10 @@ module VX_rr_arbiter #(
|
||||
reg [`CLOG2(N)-1:0] state;
|
||||
reg [N-1:0] grant_onehot_r;
|
||||
|
||||
integer i, j;
|
||||
|
||||
always @(*) begin
|
||||
for (i = 0; i < N; i++) begin
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
grant_table[i] = `CLOG2(N)'(i);
|
||||
for (j = 0; j < N; j++) begin
|
||||
for (integer j = 0; j < N; j++) begin
|
||||
if (requests[(i+j) % N]) begin
|
||||
grant_table[i] = `CLOG2(N)'((i+j) % N);
|
||||
end
|
||||
@@ -44,8 +42,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= 0;
|
||||
end
|
||||
else begin
|
||||
end else begin
|
||||
state <= grant_index;
|
||||
end
|
||||
end
|
||||
|
||||
@@ -74,7 +74,6 @@ module VX_scope #(
|
||||
read_delta <= 0;
|
||||
data_valid <= 0;
|
||||
end else begin
|
||||
|
||||
if (bus_write) begin
|
||||
case (cmd_type)
|
||||
CMD_GET_VALID,
|
||||
|
||||
@@ -16,7 +16,7 @@ module VX_shift_register #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
entries <= '0;
|
||||
entries <= (DEPTH * DATAW)'(0);
|
||||
end else begin
|
||||
if (enable) begin
|
||||
entries <= in;
|
||||
@@ -28,7 +28,7 @@ module VX_shift_register #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
entries <= '0;
|
||||
entries <= (DEPTH * DATAW)'(0);
|
||||
end else begin
|
||||
if (enable) begin
|
||||
entries <= {entries[DEPTH-2:0], in};
|
||||
|
||||
4
hw/syn/quartus/cache/Makefile
vendored
4
hw/syn/quartus/cache/Makefile
vendored
@@ -9,9 +9,9 @@ DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --do_report_timing
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user