pipeline refactoring

This commit is contained in:
Blaise Tine
2020-07-20 08:04:04 -04:00
parent 25f66e6490
commit 577a5791dc
29 changed files with 499 additions and 323 deletions

View File

@@ -10,7 +10,7 @@ module VX_alu_unit #(
VX_alu_req_if alu_req_if,
// Outputs
VX_wb_if alu_wb_if
VX_commit_if alu_commit_if
);
wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
@@ -48,7 +48,7 @@ module VX_alu_unit #(
end
end
wire stall = ~alu_wb_if.ready && (| alu_wb_if.valid);
wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
@@ -57,8 +57,8 @@ module VX_alu_unit #(
.reset (reset),
.stall (stall),
.flush (0),
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}),
.out ({alu_wb_if.valid, alu_wb_if.warp_num, alu_wb_if.curr_PC, alu_wb_if.rd, alu_wb_if.wb, alu_wb_if.data})
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}),
.out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data})
);
assign alu_req_if.ready = ~stall;

View File

@@ -10,8 +10,8 @@ module VX_branch_unit #(
VX_branch_req_if branch_req_if,
// Outputs
VX_branch_rsp_if branch_rsp_if,
VX_wb_if branch_wb_if
VX_branch_ctl_if branch_ctl_if,
VX_commit_if branch_commit_if
);
wire [`NT_BITS-1:0] br_result_index;
@@ -19,7 +19,7 @@ module VX_branch_unit #(
VX_priority_encoder #(
.N(`NUM_THREADS)
) choose_alu_result (
.data_in (alu_req_if.valid),
.data_in (branch_req_if.valid),
.data_out (br_result_index),
`UNUSED_PIN (valid_out)
);
@@ -53,7 +53,7 @@ module VX_branch_unit #(
wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC;
wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset);
wire stall = (~branch_wb_if.ready && (| branch_wb_if.valid));
wire stall = (~branch_commit_if.ready && (| branch_commit_if.valid));
VX_generic_register #(
.N(1 + `NW_BITS + 1 + 32)
@@ -63,7 +63,7 @@ module VX_branch_unit #(
.stall (stall),
.flush (0),
.in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}),
.out ({branch_rsp_if.valid, branch_rsp_if.warp_num, branch_rsp_if.taken, branch_rsp_if.dest})
.out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest})
);
VX_generic_register #(
@@ -74,7 +74,7 @@ module VX_branch_unit #(
.stall (stall),
.flush (0),
.in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}),
.out ({branch_wb_if.valid, branch_wb_if.warp_num, branch_wb_if.curr_PC, branch_wb_if.rd, branch_wb_if.wb, branch_wb_if.data})
.out ({branch_commit_if.valid, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.rd, branch_commit_if.wb, branch_commit_if.data})
);
assign branch_req_if.ready = ~stall;

105
hw/rtl/VX_commit.v Normal file
View File

@@ -0,0 +1,105 @@
`include "VX_define.vh"
module VX_commit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_commit_if alu_commit_if,
VX_commit_if branch_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if gpu_commit_if,
// outputs
VX_wb_if writeback_if,
VX_perf_cntrs_if perf_cntrs_if
);
wire [`NUM_EXS-1:0] commited_mask;
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
((| branch_commit_if.valid) && branch_commit_if.ready),
((| lsu_commit_if.valid) && lsu_commit_if.ready),
((| mul_commit_if.valid) && mul_commit_if.ready),
((| csr_commit_if.valid) && csr_commit_if.ready),
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
wire [`NE_BITS:0] num_commits;
VX_countones #(
.N(`NUM_EXS)
) valids_counter (
.valids(commited_mask),
.count (num_commits)
);
wire has_committed = (| commited_mask);
reg [63:0] total_cycles, total_instrs;
always @(posedge clk) begin
if (reset) begin
total_cycles <= 0;
total_instrs <= 0;
end else begin
total_cycles <= total_cycles + 1;
if (has_committed) begin
total_instrs <= total_instrs + 64'(num_commits);
end
end
end
assign perf_cntrs_if.total_cycles = total_cycles;
assign perf_cntrs_if.total_instrs = total_instrs;
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
VX_writeback #(
.CORE_ID(CORE_ID)
) writeback (
.clk (clk),
.reset (reset),
.alu_commit_if (alu_commit_if),
.branch_commit_if(branch_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.writeback_if (writeback_if)
);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if ((| alu_commit_if.valid) && alu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
end
if ((| branch_commit_if.valid) && branch_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.wb, branch_commit_if.rd, branch_commit_if.data);
end
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
end
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
end
end
`endif
endmodule

View File

@@ -12,11 +12,11 @@ module VX_csr_arb (
VX_csr_req_if csr_req_if,
// input
VX_wb_if csr_rsp_if,
VX_commit_if csr_rsp_if,
// outputs
VX_csr_io_rsp_if csr_io_rsp_if,
VX_wb_if csr_wb_if
VX_commit_if csr_commit_if
);
`UNUSED_VAR (clk)
@@ -42,13 +42,13 @@ module VX_csr_arb (
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
assign csr_wb_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
assign csr_wb_if.warp_num = csr_rsp_if.warp_num;
assign csr_wb_if.curr_PC = csr_rsp_if.curr_PC;
assign csr_wb_if.data = csr_rsp_if.data;
assign csr_wb_if.rd = csr_rsp_if.rd;
assign csr_wb_if.wb = csr_rsp_if.wb;
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
assign csr_commit_if.data = csr_rsp_if.data;
assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_wb_if.ready;
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule

View File

@@ -3,8 +3,7 @@
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk, // Clock
input wire reset,
input wire clk,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
output reg[31:0] read_data,
@@ -15,29 +14,18 @@ module VX_csr_data #(
`IGNORE_WARNINGS_END
input wire[`CSR_WIDTH-1:0] write_data,
input wire[`NW_BITS-1:0] warp_num,
input wire notify_commit
VX_perf_cntrs_if perf_cntrs_if
);
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
reg [63:0] num_cycles, num_instrs;
// cast address to physical CSR range
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
always @(posedge clk) begin
if (reset) begin
num_cycles <= 0;
num_instrs <= 0;
end else begin
if (write_enable) begin
csr_table[wr_addr] <= write_data;
end
num_cycles <= num_cycles + 1;
if (notify_commit) begin
num_instrs <= num_instrs + 1;
end
if (write_enable) begin
csr_table[wr_addr] <= write_data;
end
end
@@ -50,10 +38,10 @@ module VX_csr_data #(
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_CYCLE_L : read_data = num_cycles[31:0];
`CSR_CYCLE_H : read_data = num_cycles[63:32];
`CSR_INSTR_L : read_data = num_instrs[31:0];
`CSR_INSTR_H : read_data = num_instrs[63:32];
`CSR_CYCLE_L : read_data = perf_cntrs_if.total_cycles[31:0];
`CSR_CYCLE_H : read_data = perf_cntrs_if.total_cycles[63:32];
`CSR_INSTR_L : read_data = perf_cntrs_if.total_instrs[31:0];
`CSR_INSTR_H : read_data = perf_cntrs_if.total_instrs[63:32];
`CSR_VEND_ID : read_data = `VENDOR_ID;
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;

View File

@@ -5,14 +5,17 @@ module VX_csr_pipe #(
) (
input wire clk,
input wire reset,
VX_csr_req_if csr_req_if,
VX_csr_io_req_if csr_io_req_if,
VX_wb_if csr_wb_if,
VX_perf_cntrs_if perf_cntrs_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
input wire notify_commit
VX_csr_req_if csr_req_if,
VX_commit_if csr_commit_if
);
VX_csr_req_if csr_pipe_req_if();
VX_wb_if csr_pipe_wb_if();
VX_csr_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_commit_if();
VX_csr_arb csr_arb (
.clk (clk),
@@ -20,9 +23,9 @@ module VX_csr_pipe #(
.csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_wb_if),
.csr_rsp_if (csr_pipe_commit_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_wb_if (csr_wb_if)
.csr_commit_if (csr_commit_if)
);
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
@@ -30,24 +33,23 @@ module VX_csr_pipe #(
wire [31:0] csr_updated_data_s2;
wire [31:0] csr_read_data_unqual;
wire is_csr_s2 = (| csr_pipe_wb_if.valid);
wire is_csr_s2 = (| csr_pipe_commit_if.valid);
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.read_addr (csr_pipe_req_if.csr_addr),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_addr_s2),
.warp_num (csr_pipe_req_if.warp_num),
.notify_commit (notify_commit)
.perf_cntrs_if (perf_cntrs_if)
);
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
&& (csr_pipe_wb_if.warp_num == csr_pipe_req_if.warp_num)
&& (csr_pipe_commit_if.warp_num == csr_pipe_req_if.warp_num)
&& is_csr_s2;
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
@@ -63,7 +65,7 @@ module VX_csr_pipe #(
endcase
end
wire stall = ~csr_pipe_wb_if.ready && (| csr_pipe_wb_if.valid);
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
@@ -73,12 +75,12 @@ module VX_csr_pipe #(
.stall (stall),
.flush (0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
.out ({csr_pipe_wb_if.valid, csr_pipe_wb_if.warp_num, csr_pipe_wb_if.curr_PC, csr_pipe_wb_if.rd, csr_pipe_wb_if.wb, csr_addr_s2, csr_pipe_wb_if.is_io, csr_read_data_s2, csr_updated_data_s2})
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2})
);
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
assign csr_pipe_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;
end

View File

@@ -173,11 +173,13 @@
`define EX_BR 3'h2
`define EX_MUL 3'h3
`define EX_LSU 3'h4
`define EX_FPU 3'h5
`define EX_CSR 3'h6
`define EX_GPU 3'h7
`define EX_CSR 3'h5
`define EX_GPU 3'h6
`define EX_BITS 3
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
`define WB_NO 2'h0
`define WB_ALU 2'h1
`define WB_MEM 2'h2
@@ -374,7 +376,6 @@ task print_ex_type;
`EX_LSU: $write("LSU");
`EX_CSR: $write("CSR");
`EX_MUL: $write("MUL");
`EX_FPU: $write("FPU");
`EX_GPU: $write("GPU");
default: $write("NOP");
endcase

View File

@@ -17,43 +17,29 @@ module VX_execute #(
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
// inputs
VX_execute_if execute_if,
VX_wb_if writeback_if,
// perf
VX_perf_cntrs_if perf_cntrs_if,
// inputs
VX_alu_req_if alu_req_if,
VX_branch_req_if branch_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_gpu_req_if gpu_req_if,
// outputs
VX_branch_rsp_if branch_rsp_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
VX_wb_if alu_wb_if,
VX_wb_if branch_wb_if,
VX_wb_if lsu_wb_if,
VX_wb_if csr_wb_if,
VX_wb_if mul_wb_if,
input wire notify_commit,
VX_commit_if alu_commit_if,
VX_commit_if branch_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if gpu_commit_if,
output wire ebreak
);
VX_alu_req_if alu_req_if();
VX_branch_req_if branch_req_if();
VX_csr_req_if csr_req_if();
VX_lsu_req_if lsu_req_if();
VX_mul_req_if mul_req_if();
VX_gpu_req_if gpu_req_if();
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.execute_if (execute_if),
.alu_req_if (alu_req_if),
.branch_req_if (branch_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.gpu_req_if (gpu_req_if)
);
VX_alu_unit #(
.CORE_ID(CORE_ID)
@@ -61,7 +47,7 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.alu_req_if (alu_req_if),
.alu_wb_if (alu_wb_if)
.alu_commit_if (alu_commit_if)
);
VX_branch_unit #(
@@ -70,8 +56,8 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.branch_req_if (branch_req_if),
.branch_rsp_if (branch_rsp_if),
.branch_wb_if (branch_wb_if)
.branch_ctl_if (branch_ctl_if),
.branch_commit_if(branch_commit_if)
);
VX_lsu_unit #(
@@ -83,19 +69,19 @@ module VX_execute #(
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.lsu_req_if (lsu_req_if),
.lsu_wb_if (lsu_wb_if)
.lsu_commit_if (lsu_commit_if)
);
VX_csr_pipe #(
.CORE_ID(CORE_ID)
) csr_pipe (
.clk (clk),
.reset (reset),
.csr_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.csr_wb_if (csr_wb_if),
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.notify_commit (notify_commit)
.csr_req_if (csr_req_if),
.csr_commit_if (csr_commit_if)
);
VX_mul_unit #(
@@ -104,14 +90,15 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.mul_req_if (mul_req_if),
.mul_wb_if (mul_wb_if)
.mul_commit_if (mul_commit_if)
);
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
.gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if)
.warp_ctl_if (warp_ctl_if),
.gpu_commit_if (gpu_commit_if)
);
assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL);

View File

@@ -13,7 +13,7 @@ module VX_fetch #(
// inputs
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_rsp_if branch_rsp_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
// outputs
@@ -32,7 +32,7 @@ module VX_fetch #(
.warp_ctl_if (warp_ctl_if),
.wstall_if (wstall_if),
.join_if (join_if),
.branch_rsp_if (branch_rsp_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_req_if (ifetch_req_if),
.ifetch_rsp_if (ifetch_rsp_if),
.busy (busy)

View File

@@ -2,7 +2,7 @@
module VX_gpr_mux (
// inputs
VX_execute_if execute_if,
VX_execute_if execute_if,
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
@@ -80,9 +80,10 @@ module VX_gpr_mux (
// GPU unit
assign gpu_req_if.valid = execute_if.valid & is_gpu;
assign gpu_req_if.warp_num = execute_if.warp_num;
assign gpu_req_if.next_PC = execute_if.next_PC;
assign gpu_req_if.curr_PC = execute_if.curr_PC;
assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op);
assign gpu_req_if.rs1_data = rs1_data;
assign gpu_req_if.rs2_data = rs2_data[0];
assign gpu_req_if.next_PC = execute_if.next_PC;
endmodule

View File

@@ -12,17 +12,17 @@ module VX_gpr_ram (
);
`ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] ram [31:0];
reg [`NUM_THREADS-1:0][3:0][7:0] ram [`NUM_REGS-1:0];
integer i;
initial begin
// initialize r0 to 0
for (i = 0; i < `NUM_THREADS; i++) begin
ram[i][0] = 0;
ram[i][1] = 0;
ram[i][2] = 0;
ram[i][3] = 0;
ram[0][i][0] = 0;
ram[0][i][1] = 0;
ram[0][i][2] = 0;
ram[0][i][3] = 0;
end
end

View File

@@ -6,9 +6,9 @@ module VX_gpr_stage #(
input wire clk,
input wire reset,
// inputs
VX_execute_if execute_if,
// inputs
VX_wb_if writeback_if,
VX_execute_if execute_if,
// outputs
VX_alu_req_if alu_req_if,
@@ -38,7 +38,7 @@ module VX_gpr_stage #(
generate
for (i = 0; i < `NUM_WARPS; i++) begin
assign we[i] = writeback_if.valid & {`NUM_THREADS{(writeback_if.wb != 0) && (i == writeback_if.warp_num)}};
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
VX_gpr_ram gpr_ram (
.clk (clk),
.we (we[i]),

View File

@@ -7,7 +7,8 @@ module VX_gpu_unit #(
VX_gpu_req_if gpu_req_if,
// Output
VX_warp_ctl_if warp_ctl_if
VX_warp_ctl_if warp_ctl_if,
VX_commit_if gpu_commit_if
);
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
@@ -76,4 +77,10 @@ module VX_gpu_unit #(
assign gpu_req_if.ready = 1'b1; // has no stalls
// commit
assign gpu_commit_if.valid = gpu_req_if.valid;
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
assign gpu_commit_if.wb = `WB_NO;
endmodule

View File

@@ -1,6 +1,6 @@
`include "VX_define.vh"
module VX_issue #(
module VX_issue #(
parameter CORE_ID = 0
) (
input wire clk,
@@ -9,79 +9,41 @@ module VX_issue #(
VX_decode_if decode_if,
VX_wb_if writeback_if,
VX_execute_if execute_if,
output wire is_empty
VX_alu_req_if alu_req_if,
VX_branch_req_if branch_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_gpu_req_if gpu_req_if
);
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
VX_execute_if execute_if();
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid;
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
VX_scheduler #(
.CORE_ID(CORE_ID)
) scheduler (
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.execute_if (execute_if),
`UNUSED_PIN (is_empty)
);
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.execute_if (execute_if),
.writeback_if (writeback_if),
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
wire ex_stalled = (| decode_if.valid)
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|| (!execute_if.br_ready && (decode_if.ex_type == `EX_BR))
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
wire stall = rename_valid || ex_stalled;
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && (decode_if.rd != 0) && ~stall;
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
integer i, w;
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w++) begin
for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0;
end
end
count_valid <= 0;
end else begin
if (acquire_rd) begin
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
end
count_valid <= count_valid_next;
end
end
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
) schedule_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
);
assign decode_if.ready = ~stall;
assign is_empty = (0 == count_valid);
.alu_req_if (alu_req_if),
.branch_req_if (branch_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.gpu_req_if (gpu_req_if)
);
endmodule

View File

@@ -16,7 +16,7 @@ module VX_lsu_unit #(
VX_lsu_req_if lsu_req_if,
// outputs
VX_wb_if lsu_wb_if
VX_commit_if lsu_commit_if
);
wire [`NUM_THREADS-1:0] use_valid;
@@ -108,7 +108,7 @@ module VX_lsu_unit #(
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, lsu_wb_if.curr_PC, lsu_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_wb_if.rd, lsu_wb_if.warp_num}),
.read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
`UNUSED_PIN (empty)
);
@@ -151,11 +151,11 @@ module VX_lsu_unit #(
end
end
assign lsu_wb_if.valid = dcache_rsp_if.valid;
assign lsu_wb_if.data = core_rsp_data;
assign lsu_commit_if.valid = dcache_rsp_if.valid;
assign lsu_commit_if.data = core_rsp_data;
// Can accept new cache response
assign dcache_rsp_if.ready = lsu_wb_if.ready;
assign dcache_rsp_if.ready = lsu_commit_if.ready;
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
@@ -180,7 +180,7 @@ module VX_lsu_unit #(
end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
$time, CORE_ID, lsu_wb_if.valid, lsu_wb_if.warp_num, lsu_wb_if.curr_PC, mrq_read_addr, lsu_wb_if.rd, lsu_wb_if.data);
$time, CORE_ID, lsu_commit_if.valid, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, mrq_read_addr, lsu_commit_if.rd, lsu_commit_if.data);
end
end
`endif

View File

@@ -10,7 +10,7 @@ module VX_mul_unit #(
VX_mul_req_if mul_req_if,
// Outputs
VX_wb_if mul_wb_if
VX_commit_if mul_commit_if
);
wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][63:0] mul_result;
@@ -71,7 +71,7 @@ module VX_mul_unit #(
`MUL_DIV,
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
`MUL_REM,
`MUL_REMU: alu_result[i] = (alu_in2 == 0) ? alu_in1[i] : rem_result[i];
`MUL_REMU: alu_result[i] = (alu_in2[i] == 0) ? alu_in1[i] : rem_result[i];
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
endcase
end
@@ -104,7 +104,7 @@ module VX_mul_unit #(
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
wire stall = (~mul_wb_if.ready && (| mul_wb_if.valid))
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|| pipeline_stall;
VX_generic_register #(
@@ -115,7 +115,7 @@ module VX_mul_unit #(
.stall (stall),
.flush (0),
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
.out ({mul_wb_if.valid, mul_wb_if.warp_num, mul_wb_if.curr_PC, mul_wb_if.rd, mul_wb_if.wb, mul_wb_if.data})
.out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
);
assign mul_req_if.ready = ~stall;

View File

@@ -101,22 +101,27 @@ module VX_pipeline #(
assign csr_io_rsp_data = csr_io_rsp_if.data;
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
VX_perf_cntrs_if perf_cntrs_if();
VX_decode_if decode_if();
VX_execute_if execute_if();
VX_branch_rsp_if branch_rsp_if();
VX_branch_ctl_if branch_ctl_if();
VX_warp_ctl_if warp_ctl_if();
VX_ifetch_rsp_if ifetch_rsp_if();
VX_alu_req_if alu_req_if();
VX_branch_req_if branch_req_if();
VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if();
VX_mul_req_if mul_req_if();
VX_gpu_req_if gpu_req_if();
VX_wb_if writeback_if();
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_wb_if alu_wb_if();
VX_wb_if branch_wb_if();
VX_wb_if lsu_wb_if();
VX_wb_if csr_wb_if();
VX_wb_if mul_wb_if();
VX_commit_if alu_commit_if();
VX_commit_if branch_commit_if();
VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if gpu_commit_if();
wire notify_commit;
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
@@ -127,7 +132,7 @@ module VX_pipeline #(
.wstall_if (wstall_if),
.join_if (join_if),
.warp_ctl_if (warp_ctl_if),
.branch_rsp_if (branch_rsp_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_rsp_if (ifetch_rsp_if),
.busy (busy)
);
@@ -148,10 +153,16 @@ module VX_pipeline #(
) issue (
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.execute_if (execute_if),
`UNUSED_PIN (is_empty)
.alu_req_if (alu_req_if),
.branch_req_if (branch_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.gpu_req_if (gpu_req_if)
);
VX_execute #(
@@ -160,35 +171,49 @@ module VX_pipeline #(
`SCOPE_SIGNALS_LSU_BIND
.clk (clk),
.reset (reset),
.dcache_req_if (core_dcache_req_if),
.dcache_rsp_if (core_dcache_rsp_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.execute_if (execute_if),
.writeback_if (writeback_if),
.csr_io_rsp_if (csr_io_rsp_if),
.perf_cntrs_if (perf_cntrs_if),
.alu_req_if (alu_req_if),
.branch_req_if (branch_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if),
.branch_rsp_if (branch_rsp_if),
.alu_wb_if (alu_wb_if),
.branch_wb_if (branch_wb_if),
.lsu_wb_if (lsu_wb_if),
.csr_wb_if (csr_wb_if),
.mul_wb_if (mul_wb_if),
.notify_commit (notify_commit),
.branch_ctl_if (branch_ctl_if),
.alu_commit_if (alu_commit_if),
.branch_commit_if(branch_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.gpu_commit_if (gpu_commit_if),
.ebreak (ebreak)
);
VX_writeback #(
VX_commit #(
.CORE_ID(CORE_ID)
) writeback (
) commit (
.clk (clk),
.reset (reset),
.alu_wb_if (alu_wb_if),
.branch_wb_if (branch_wb_if),
.lsu_wb_if (lsu_wb_if),
.csr_wb_if (csr_wb_if),
.mul_wb_if (mul_wb_if),
.alu_commit_if (alu_commit_if),
.branch_commit_if(branch_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if),
.notify_commit (notify_commit)
.perf_cntrs_if (perf_cntrs_if)
);
assign dcache_req_valid = core_dcache_req_if.valid;
@@ -223,12 +248,4 @@ module VX_pipeline #(
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if ((| execute_if.valid) && (~execute_if.alu_ready || ~execute_if.br_ready || ~execute_if.lsu_ready || ~execute_if.csr_ready || ~execute_if.mul_ready || ~execute_if.gpu_ready)) begin
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, alu=%b, br=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, ~execute_if.alu_ready, ~execute_if.br_ready, ~execute_if.lsu_ready, ~execute_if.csr_ready, ~execute_if.mul_ready, ~execute_if.gpu_ready);
end
end
`endif
endmodule

86
hw/rtl/VX_scheduler.v Normal file
View File

@@ -0,0 +1,86 @@
`include "VX_define.vh"
module VX_scheduler #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
VX_decode_if decode_if,
VX_wb_if writeback_if,
VX_execute_if execute_if,
output wire is_empty
);
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid;
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
wire ex_stalled = (| decode_if.valid)
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|| (!execute_if.br_ready && (decode_if.ex_type == `EX_BR))
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
wire stall = rename_valid || ex_stalled;
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
wire release_rd = (| writeback_if.valid);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
integer i, w;
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w++) begin
for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0;
end
end
count_valid <= 0;
end else begin
if (acquire_rd) begin
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
end
count_valid <= count_valid_next;
end
end
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
) schedule_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
);
assign decode_if.ready = ~stall;
assign is_empty = (0 == count_valid);
endmodule

View File

@@ -9,7 +9,7 @@ module VX_warp_sched #(
VX_warp_ctl_if warp_ctl_if,
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_rsp_if branch_rsp_if,
VX_branch_ctl_if branch_ctl_if,
VX_ifetch_rsp_if ifetch_rsp_if,
VX_ifetch_req_if ifetch_req_if,
@@ -158,11 +158,11 @@ module VX_warp_sched #(
end
// Branch
if (branch_rsp_if.valid) begin
if (branch_rsp_if.taken) begin
warp_pcs[branch_rsp_if.warp_num] <= branch_rsp_if.dest;
if (branch_ctl_if.valid) begin
if (branch_ctl_if.taken) begin
warp_pcs[branch_ctl_if.warp_num] <= branch_ctl_if.dest;
end
warp_stalled[branch_rsp_if.warp_num] <= 0;
warp_stalled[branch_ctl_if.warp_num] <= 0;
end
// Lock/Release
@@ -230,7 +230,7 @@ module VX_warp_sched #(
);
end
wire should_bra = (branch_rsp_if.valid && branch_rsp_if.taken && (warp_to_schedule == branch_rsp_if.warp_num));
wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num));
assign hazard = should_bra && schedule;
@@ -244,7 +244,7 @@ module VX_warp_sched #(
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
assign thread_mask = global_stall ? 0 : (real_use_wspawn ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule;

View File

@@ -3,109 +3,84 @@
module VX_writeback #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// inputs
VX_wb_if alu_wb_if,
VX_wb_if branch_wb_if,
VX_wb_if lsu_wb_if,
VX_wb_if mul_wb_if,
VX_wb_if csr_wb_if,
VX_commit_if alu_commit_if,
VX_commit_if branch_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
// outputs
VX_wb_if writeback_if,
output wire notify_commit
VX_wb_if writeback_if
);
wire br_valid = (| branch_wb_if.valid);
wire lsu_valid = (| lsu_wb_if.valid);
wire mul_valid = (| mul_wb_if.valid);
wire alu_valid = (| alu_wb_if.valid);
wire csr_valid = (| csr_wb_if.valid);
wire br_valid = (| branch_commit_if.valid) && (branch_commit_if.wb != `WB_NO);
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO);
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO);
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO);
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO);
VX_wb_if writeback_tmp_if();
assign writeback_tmp_if.valid = br_valid ? branch_wb_if.valid :
lsu_valid ? lsu_wb_if.valid :
mul_valid ? mul_wb_if.valid :
alu_valid ? alu_wb_if.valid :
csr_valid ? csr_wb_if.valid :
assign writeback_tmp_if.valid = br_valid ? branch_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
mul_valid ? mul_commit_if.valid :
alu_valid ? alu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
0;
assign writeback_tmp_if.warp_num = br_valid ? branch_wb_if.warp_num :
lsu_valid ? lsu_wb_if.warp_num :
mul_valid ? mul_wb_if.warp_num :
alu_valid ? alu_wb_if.warp_num :
csr_valid ? csr_wb_if.warp_num :
assign writeback_tmp_if.warp_num = br_valid ? branch_commit_if.warp_num :
lsu_valid ? lsu_commit_if.warp_num :
mul_valid ? mul_commit_if.warp_num :
alu_valid ? alu_commit_if.warp_num :
csr_valid ? csr_commit_if.warp_num :
0;
assign writeback_tmp_if.curr_PC = br_valid ? branch_wb_if.curr_PC :
lsu_valid ? lsu_wb_if.curr_PC :
mul_valid ? mul_wb_if.curr_PC :
alu_valid ? alu_wb_if.curr_PC :
csr_valid ? csr_wb_if.curr_PC :
0;
assign writeback_tmp_if.data = br_valid ? branch_wb_if.data :
lsu_valid ? lsu_wb_if.data :
mul_valid ? mul_wb_if.data :
alu_valid ? alu_wb_if.data :
csr_valid ? csr_wb_if.data :
0;
assign writeback_tmp_if.rd = br_valid ? branch_wb_if.rd :
lsu_valid ? lsu_wb_if.rd :
mul_valid ? mul_wb_if.rd :
alu_valid ? alu_wb_if.rd :
csr_valid ? csr_wb_if.rd :
0;
assign writeback_tmp_if.wb = br_valid ? branch_wb_if.wb :
lsu_valid ? lsu_wb_if.wb :
alu_valid ? alu_wb_if.wb :
csr_valid ? csr_wb_if.wb :
mul_valid ? mul_wb_if.wb :
0;
assign writeback_tmp_if.data = br_valid ? branch_commit_if.data :
lsu_valid ? lsu_commit_if.data :
mul_valid ? mul_commit_if.data :
alu_valid ? alu_commit_if.data :
csr_valid ? csr_commit_if.data :
0;
assign writeback_tmp_if.rd = br_valid ? branch_commit_if.rd :
lsu_valid ? lsu_commit_if.rd :
mul_valid ? mul_commit_if.rd :
alu_valid ? alu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
0;
wire stall = ~writeback_if.ready && (| writeback_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + (`NUM_THREADS * 32) + `WB_BITS)
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32))
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.wb}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.rd, writeback_if.data, writeback_if.wb})
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data})
);
assign branch_wb_if.ready = !stall;
assign lsu_wb_if.ready = !stall && !br_valid;
assign mul_wb_if.ready = !stall && !br_valid && !lsu_valid;
assign alu_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid;
assign csr_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid;
assign branch_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !br_valid;
assign mul_commit_if.ready = !stall && !br_valid && !lsu_valid;
assign alu_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid;
assign csr_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid;
assign notify_commit = (| writeback_tmp_if.valid) && ~stall;
// special workaround to control RISC-V benchmarks termination on Verilator
reg [31:0] last_data_wb /* verilator public */;
always @(posedge clk) begin
if (notify_commit && (writeback_tmp_if.wb != 0) && (writeback_tmp_if.rd == 28)) begin
if ((| writeback_tmp_if.valid) && ~stall && (writeback_tmp_if.rd == 28)) begin
last_data_wb <= writeback_tmp_if.data[0];
end
end
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if ((| writeback_tmp_if.valid) && ~stall) begin
$display("%t: Core%0d-WB: warp=%0d, PC=%0h, rd=%0d, wb=%0d, data=%0h", $time, CORE_ID, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.data);
end
end
`endif
endmodule

View File

@@ -46,7 +46,7 @@ module VX_cache_core_rsp_merge #(
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
wire stall = ~core_rsp_ready;
wire stall = ~core_rsp_ready && (| core_rsp_valid);
integer i;

View File

@@ -0,0 +1,15 @@
`ifndef VX_BRANCH_RSP_IF
`define VX_BRANCH_RSP_IF
`include "VX_define.vh"
interface VX_branch_ctl_if ();
wire valid;
wire [`NW_BITS-1:0] warp_num;
wire taken;
wire [31:0] dest;
endinterface
`endif

View File

@@ -0,0 +1,19 @@
`ifndef VX_COMMIT_IF
`define VX_COMMIT_IF
`include "VX_define.vh"
interface VX_commit_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb;
wire is_io;
wire ready;
endinterface
`endif

View File

@@ -3,7 +3,7 @@
`include "VX_define.vh"
interface VX_execute_if();
interface VX_execute_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;

View File

@@ -7,12 +7,13 @@ interface VX_gpu_req_if();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] next_PC;
wire [31:0] curr_PC;
wire [`GPU_BITS-1:0] gpu_op;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [31:0] rs2_data;
wire [31:0] next_PC;
wire ready;

View File

@@ -0,0 +1,13 @@
`ifndef VX_PERF_CNTRS_IF
`define VX_PERF_CNTRS_IF
`include "VX_define.vh"
interface VX_perf_cntrs_if ();
wire [63:0] total_cycles;
wire [63:0] total_instrs;
endinterface
`endif

View File

@@ -6,12 +6,9 @@
interface VX_wb_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [`NW_BITS-1:0] warp_num;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb;
wire is_io;
wire ready;
endinterface

View File

@@ -257,12 +257,12 @@ bool Simulator::run() {
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#endif
#endif

View File

@@ -22,7 +22,7 @@ int main(int argc, char **argv)
"../../../benchmarks/riscv_tests/rv32ui-p-bltu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-bne.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-jal.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lb.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lbu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lh.hex",