Readded IPDOM stack + SPLIT/Join tested
This commit is contained in:
@@ -8,8 +8,9 @@ EXE=--exe ./simulate/test_bench.cpp
|
||||
|
||||
COMP=--compiler gcc
|
||||
|
||||
WNO=-Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint --Wno-PINMISSING -Wno-STMTDLY
|
||||
WNO=-Wno-UNOPTFLAT -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH
|
||||
|
||||
LIGHTW=-Wno-UNOPTFLAT --Wno-PINMISSING -Wno-WIDTH -Wno-STMTDLY
|
||||
# LIB=-LDFLAGS '-L/usr/local/systemc/'
|
||||
LIB=
|
||||
|
||||
@@ -23,7 +24,7 @@ MAKECPP=(cd obj_dir && make -j -f VVortex.mk)
|
||||
# -LDFLAGS '-lsystemc'
|
||||
VERILATOR:
|
||||
echo "#define VCD_OFF" > simulate/tb_debug.h
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF)
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW)
|
||||
|
||||
VERILATORnoWarnings:
|
||||
echo "#define VCD_OFF" > simulate/tb_debug.h
|
||||
|
||||
@@ -186,23 +186,23 @@ module VX_decode(
|
||||
case(curr_opcode)
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && in_valid[0];
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && in_valid[0];
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`SYS_INST:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
|
||||
temp_jal = jal_sys_jal && in_valid[0];
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
|
||||
temp_jal = jal_sys_jal && (|in_valid);
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && in_valid[0];
|
||||
temp_jal = 1'b0 && (|in_valid);
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
endcase
|
||||
@@ -216,7 +216,7 @@ module VX_decode(
|
||||
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && in_valid[0]);
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign VX_frE_to_bckE_req.ebreak = ebreak;
|
||||
assign out_ebreak = ebreak;
|
||||
|
||||
@@ -257,7 +257,7 @@ module VX_decode(
|
||||
`B_INST:
|
||||
begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && in_valid[0];
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
case(func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
@@ -272,17 +272,17 @@ module VX_decode(
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && in_valid[0];
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && in_valid[0];
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && in_valid[0];
|
||||
temp_branch_stall = 1'b0 && (|in_valid);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -58,16 +58,26 @@ module VX_execute_unit (
|
||||
endgenerate
|
||||
|
||||
|
||||
wire [`NW_M1:0] branch_use_index;
|
||||
wire branch_found_valid;
|
||||
VX_priority_encoder choose_alu_result(
|
||||
.valids(VX_exec_unit_req.valid),
|
||||
.index (branch_use_index),
|
||||
.found (branch_found_valid)
|
||||
);
|
||||
|
||||
wire[31:0] branch_use_alu_result = alu_result[branch_use_index];
|
||||
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case(VX_exec_unit_req.branch_type)
|
||||
`BEQ: temp_branch_dir = (alu_result[0] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (alu_result[0] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGT: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BLTU: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGTU: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
|
||||
default: temp_branch_dir = `NOT_TAKEN;
|
||||
endcase // in_branch_type
|
||||
|
||||
@@ -22,13 +22,12 @@ module VX_generic_stack
|
||||
if (reset) begin
|
||||
ptr <= 0;
|
||||
for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0;
|
||||
end else if (push)
|
||||
ptr <= ptr + 2;
|
||||
else if (pop) begin
|
||||
ptr <= ptr - 1;
|
||||
end else if (push) begin
|
||||
stack[ptr] <= q1;
|
||||
stack[ptr+1] <= q2;
|
||||
ptr <= ptr + 2;
|
||||
end else if (pop) begin
|
||||
ptr <= ptr - 1;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -7,6 +7,9 @@ module VX_gpgpu_inst (
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
|
||||
wire is_split = (VX_gpu_inst_req.is_split);
|
||||
|
||||
wire[`NT_M1:0] tmc_new_mask;
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
|
||||
@@ -14,7 +17,7 @@ module VX_gpgpu_inst (
|
||||
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
|
||||
wire valid_inst = (|VX_gpu_inst_req.valid);
|
||||
wire valid_inst = (|curr_valids);
|
||||
|
||||
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
|
||||
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc || VX_gpu_inst_req.is_split) && valid_inst;
|
||||
@@ -34,20 +37,19 @@ module VX_gpgpu_inst (
|
||||
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
|
||||
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (!curr_bool);
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
||||
end
|
||||
|
||||
reg[$clog2(`NT)-1:0] num_valids;
|
||||
integer z;
|
||||
always @(*) begin
|
||||
num_valids = 0;
|
||||
for (z = 0; z < `NT; z=z+1) begin
|
||||
if (VX_gpu_inst_req.valid[z]) num_valids = num_valids + 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire[`NW_M1:0] num_valids;
|
||||
VX_one_counter one_counter(
|
||||
.valids (curr_valids),
|
||||
.ones_found(num_valids)
|
||||
);
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_split = (VX_gpu_inst_req.is_split) && (num_valids > 1);
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
|
||||
196
rtl/VX_gpr.v
196
rtl/VX_gpr.v
@@ -15,112 +15,112 @@ module VX_gpr (
|
||||
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
|
||||
|
||||
// byte_enabled_simple_dual_port_ram first_ram(
|
||||
// .we (write_enable),
|
||||
// .clk (clk),
|
||||
// .waddr (VX_writeback_inter.rd),
|
||||
// .raddr1(VX_gpr_read.rs1),
|
||||
// .raddr2(VX_gpr_read.rs2),
|
||||
// .be (VX_writeback_inter.wb_valid),
|
||||
// .wdata (VX_writeback_inter.write_data),
|
||||
// .q1 (out_a_reg_data),
|
||||
// .q2 (out_b_reg_data)
|
||||
// );
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.waddr (VX_writeback_inter.rd),
|
||||
.raddr1(VX_gpr_read.rs1),
|
||||
.raddr2(VX_gpr_read.rs2),
|
||||
.be (VX_writeback_inter.wb_valid),
|
||||
.wdata (VX_writeback_inter.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
// wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
end
|
||||
// genvar curr_t;
|
||||
// for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
// wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
// assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
// end
|
||||
|
||||
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
// wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
|
||||
|
||||
wire cenb = !going_to_write;
|
||||
// wire cenb = !going_to_write;
|
||||
|
||||
wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
// wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
|
||||
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_a_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
// // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 first_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_a_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(cena_1),
|
||||
// .AA(VX_gpr_read.rs1),
|
||||
// .CLKB(clk),
|
||||
// .CENB(cenb),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
// /* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_b_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 second_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_b_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(cena_2),
|
||||
// .AA(VX_gpr_read.rs2),
|
||||
// .CLKB(clk),
|
||||
// .CENB(cenb),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
// /* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -42,7 +42,7 @@ module VX_scheduler (
|
||||
always @(posedge clk or posedge reset) begin
|
||||
|
||||
if (reset) begin
|
||||
for (i = 0; i < 32; i = i + 1) rename_table[i] = 0;
|
||||
for (i = 0; i < 32; i = i + 1) rename_table[i] <= 0;
|
||||
end else begin
|
||||
if (valid_wb ) rename_table[VX_writeback_inter.rd] <= 0;
|
||||
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.rd] <= 1;
|
||||
|
||||
@@ -50,7 +50,7 @@ module VX_warp_scheduler (
|
||||
|
||||
);
|
||||
|
||||
wire[(1+32+`NT_M1):0] d;
|
||||
wire[(1+32+`NT_M1):0] d[`NW-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire[31:0] join_pc;
|
||||
@@ -182,11 +182,11 @@ module VX_warp_scheduler (
|
||||
end
|
||||
end
|
||||
|
||||
wire[(1+32+`NT_M1):0] q1 = {1'b1, warp_pcs[split_warp_num], thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
|
||||
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d;
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
|
||||
|
||||
|
||||
@@ -202,7 +202,7 @@ module VX_warp_scheduler (
|
||||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (d),
|
||||
.d (d[curr_warp]),
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Dynamic Instructions: 12
|
||||
# of total cycles: 25
|
||||
# Dynamic Instructions: 36
|
||||
# of total cycles: 50
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: 2.08333
|
||||
# time to simulate: 0 milliseconds
|
||||
# CPI: 1.38889
|
||||
# time to simulate: 6.95313e-310 milliseconds
|
||||
# GRADE: Failed on test: 4294967295
|
||||
|
||||
@@ -1 +1 @@
|
||||
#define VCD_OFF
|
||||
#define VCD_OUTPUT
|
||||
|
||||
@@ -373,8 +373,14 @@ bool Vortex::simulate(std::string file_to_simulate)
|
||||
bool cont = false;
|
||||
// for (int i = 0; i < 500; i++)
|
||||
|
||||
vortex->clk = 0;
|
||||
vortex->reset = 1;
|
||||
vortex->clk = 0;
|
||||
vortex->eval();
|
||||
vortex->reset = 1;
|
||||
vortex->clk = 1;
|
||||
vortex->eval();
|
||||
vortex->reset = 0;
|
||||
vortex->clk = 0;
|
||||
|
||||
// unsigned cycles;
|
||||
counter = 0;
|
||||
|
||||
Reference in New Issue
Block a user