Readded IPDOM stack + SPLIT/Join tested

This commit is contained in:
felsabbagh3
2019-10-21 21:24:49 -04:00
parent 0672389edc
commit b6375e76de
15 changed files with 2204 additions and 2165 deletions

View File

@@ -8,8 +8,9 @@ EXE=--exe ./simulate/test_bench.cpp
COMP=--compiler gcc
WNO=-Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint --Wno-PINMISSING -Wno-STMTDLY
WNO=-Wno-UNOPTFLAT -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH
LIGHTW=-Wno-UNOPTFLAT --Wno-PINMISSING -Wno-WIDTH -Wno-STMTDLY
# LIB=-LDFLAGS '-L/usr/local/systemc/'
LIB=
@@ -23,7 +24,7 @@ MAKECPP=(cd obj_dir && make -j -f VVortex.mk)
# -LDFLAGS '-lsystemc'
VERILATOR:
echo "#define VCD_OFF" > simulate/tb_debug.h
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF)
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW)
VERILATORnoWarnings:
echo "#define VCD_OFF" > simulate/tb_debug.h

View File

@@ -186,23 +186,23 @@ module VX_decode(
case(curr_opcode)
`JAL_INST:
begin
temp_jal = 1'b1 && in_valid[0];
temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_1_offset;
end
`JALR_INST:
begin
temp_jal = 1'b1 && in_valid[0];
temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_2_offset;
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
temp_jal = jal_sys_jal && in_valid[0];
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
temp_jal = jal_sys_jal && (|in_valid);
temp_jal_offset = jal_sys_off;
end
default:
begin
temp_jal = 1'b0 && in_valid[0];
temp_jal = 1'b0 && (|in_valid);
temp_jal_offset = 32'hdeadbeef;
end
endcase
@@ -216,7 +216,7 @@ module VX_decode(
// assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && in_valid[0]);
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign VX_frE_to_bckE_req.ebreak = ebreak;
assign out_ebreak = ebreak;
@@ -257,7 +257,7 @@ module VX_decode(
`B_INST:
begin
// $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && in_valid[0];
temp_branch_stall = 1'b1 && (|in_valid);
case(func3)
3'h0: temp_branch_type = `BEQ;
3'h1: temp_branch_type = `BNE;
@@ -272,17 +272,17 @@ module VX_decode(
`JAL_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && in_valid[0];
temp_branch_stall = 1'b1 && (|in_valid);
end
`JALR_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && in_valid[0];
temp_branch_stall = 1'b1 && (|in_valid);
end
default:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b0 && in_valid[0];
temp_branch_stall = 1'b0 && (|in_valid);
end
endcase
end

View File

@@ -58,16 +58,26 @@ module VX_execute_unit (
endgenerate
wire [`NW_M1:0] branch_use_index;
wire branch_found_valid;
VX_priority_encoder choose_alu_result(
.valids(VX_exec_unit_req.valid),
.index (branch_use_index),
.found (branch_found_valid)
);
wire[31:0] branch_use_alu_result = alu_result[branch_use_index];
reg temp_branch_dir;
always @(*)
begin
case(VX_exec_unit_req.branch_type)
`BEQ: temp_branch_dir = (alu_result[0] == 0) ? `TAKEN : `NOT_TAKEN;
`BNE: temp_branch_dir = (alu_result[0] == 0) ? `NOT_TAKEN : `TAKEN;
`BLT: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGTU: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
default: temp_branch_dir = `NOT_TAKEN;
endcase // in_branch_type

View File

@@ -22,13 +22,12 @@ module VX_generic_stack
if (reset) begin
ptr <= 0;
for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0;
end else if (push)
ptr <= ptr + 2;
else if (pop) begin
ptr <= ptr - 1;
end else if (push) begin
stack[ptr] <= q1;
stack[ptr+1] <= q2;
ptr <= ptr + 2;
end else if (pop) begin
ptr <= ptr - 1;
end
end

View File

@@ -7,6 +7,9 @@ module VX_gpgpu_inst (
);
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
wire is_split = (VX_gpu_inst_req.is_split);
wire[`NT_M1:0] tmc_new_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
@@ -14,7 +17,7 @@ module VX_gpgpu_inst (
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
end
wire valid_inst = (|VX_gpu_inst_req.valid);
wire valid_inst = (|curr_valids);
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc || VX_gpu_inst_req.is_split) && valid_inst;
@@ -34,20 +37,19 @@ module VX_gpgpu_inst (
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (!curr_bool);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
end
reg[$clog2(`NT)-1:0] num_valids;
integer z;
always @(*) begin
num_valids = 0;
for (z = 0; z < `NT; z=z+1) begin
if (VX_gpu_inst_req.valid[z]) num_valids = num_valids + 1;
end
end
wire[`NW_M1:0] num_valids;
VX_one_counter one_counter(
.valids (curr_valids),
.ones_found(num_valids)
);
assign VX_warp_ctl.is_split = (VX_gpu_inst_req.is_split) && (num_valids > 1);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;

View File

@@ -15,112 +15,112 @@ module VX_gpr (
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs1),
// .raddr2(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data),
// .q1 (out_a_reg_data),
// .q2 (out_b_reg_data)
// );
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs1),
.raddr2(VX_gpr_read.rs2),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_a_reg_data),
.q2 (out_b_reg_data)
);
wire[`NT_M1:0][31:0] write_bit_mask;
// wire[`NT_M1:0][31:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {32{~local_write}};
end
// genvar curr_t;
// for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
// wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
// assign write_bit_mask[curr_t] = {32{~local_write}};
// end
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
// wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
wire cenb = !going_to_write;
// wire cenb = !going_to_write;
wire cena_1 = (VX_gpr_read.rs1 == 0);
wire cena_2 = (VX_gpr_read.rs2 == 0);
// wire cena_1 = (VX_gpr_read.rs1 == 0);
// wire cena_2 = (VX_gpr_read.rs2 == 0);
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_a_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(VX_gpr_read.rs1),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 first_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_a_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena_1),
// .AA(VX_gpr_read.rs1),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_b_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(VX_gpr_read.rs2),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 second_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_b_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena_2),
// .AA(VX_gpr_read.rs2),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
endmodule

View File

@@ -42,7 +42,7 @@ module VX_scheduler (
always @(posedge clk or posedge reset) begin
if (reset) begin
for (i = 0; i < 32; i = i + 1) rename_table[i] = 0;
for (i = 0; i < 32; i = i + 1) rename_table[i] <= 0;
end else begin
if (valid_wb ) rename_table[VX_writeback_inter.rd] <= 0;
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.rd] <= 1;

View File

@@ -50,7 +50,7 @@ module VX_warp_scheduler (
);
wire[(1+32+`NT_M1):0] d;
wire[(1+32+`NT_M1):0] d[`NW-1:0];
wire join_fall;
wire[31:0] join_pc;
@@ -182,11 +182,11 @@ module VX_warp_scheduler (
end
end
wire[(1+32+`NT_M1):0] q1 = {1'b1, warp_pcs[split_warp_num], thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
assign {join_fall, join_pc, join_tm} = d;
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
@@ -202,7 +202,7 @@ module VX_warp_scheduler (
.reset(reset),
.push (push),
.pop (pop),
.d (d),
.d (d[curr_warp]),
.q1 (q1),
.q2 (q2)
);

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 12
# of total cycles: 25
# Dynamic Instructions: 36
# of total cycles: 50
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 2.08333
# time to simulate: 0 milliseconds
# CPI: 1.38889
# time to simulate: 6.95313e-310 milliseconds
# GRADE: Failed on test: 4294967295

View File

@@ -1 +1 @@
#define VCD_OFF
#define VCD_OUTPUT

View File

@@ -373,8 +373,14 @@ bool Vortex::simulate(std::string file_to_simulate)
bool cont = false;
// for (int i = 0; i < 500; i++)
vortex->clk = 0;
vortex->reset = 1;
vortex->clk = 0;
vortex->eval();
vortex->reset = 1;
vortex->clk = 1;
vortex->eval();
vortex->reset = 0;
vortex->clk = 0;
// unsigned cycles;
counter = 0;