Added ISA2 infrastructure with bugs

This commit is contained in:
felsabbagh3
2019-10-18 05:21:32 -04:00
parent 629ed3f8f9
commit f7b55427b4
11 changed files with 124 additions and 86 deletions

View File

@@ -52,12 +52,15 @@ VX_inst_mem_wb_inter VX_mem_wb();
VX_exec_unit_req_inter VX_exec_unit_req();
VX_inst_exec_wb_inter VX_inst_exec_wb();
// GPU unit input
VX_gpu_inst_req_inter VX_gpu_inst_req();
VX_gpr_stage VX_gpr_stage(
.clk (clk),
.schedule_delay (schedule_delay),
.VX_writeback_inter(VX_writeback_inter),
.VX_bckE_req (VX_bckE_req),
.VX_warp_ctl (VX_warp_ctl),
.VX_bckE_req_out (VX_bckE_req_out),
.VX_gpr_data (VX_gpr_data)
);
@@ -67,7 +70,8 @@ VX_inst_multiplex VX_inst_mult(
.VX_bckE_req (VX_bckE_req_out),
.VX_gpr_data (VX_gpr_data),
.VX_exec_unit_req(VX_exec_unit_req),
.VX_lsu_req (VX_lsu_req)
.VX_lsu_req (VX_lsu_req),
.VX_gpu_inst_req (VX_gpu_inst_req)
);
@@ -94,6 +98,12 @@ VX_execute_unit VX_execUnit(
.out_csr_result (VX_csr_w_req.csr_result)
);
VX_gpgpu_inst VX_gpgpu_inst(
.VX_gpu_inst_req(VX_gpu_inst_req),
.VX_warp_ctl (VX_warp_ctl)
);
VX_writeback VX_wb(
.VX_mem_wb (VX_mem_wb),
.VX_inst_exec_wb (VX_inst_exec_wb),

View File

@@ -36,10 +36,11 @@ module VX_decode(
wire is_e_inst;
wire is_gpgpu;
// wire is_clone;
wire is_jalrs;
wire is_jmprt;
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_join;
wire is_barrier;
wire[2:0] func3;
wire[6:0] func7;
@@ -110,38 +111,26 @@ module VX_decode(
assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
// assign is_clone = is_gpgpu && (func3 == 5);
assign is_jalrs = is_gpgpu && (func3 == 6);
assign is_jmprt = is_gpgpu && (func3 == 4);
assign is_wspawn = is_gpgpu && (func3 == 0);
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign VX_frE_to_bckE_req.is_wspawn = is_wspawn;
assign VX_frE_to_bckE_req.is_tmc = is_tmc;
assign VX_frE_to_bckE_req.is_split = is_split;
assign VX_frE_to_bckE_req.is_barrier = is_barrier;
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
assign VX_frE_to_bckE_req.wspawn = is_wspawn;
// wire[`NT_M1:0] jalrs_thread_mask = 0;
// wire[`NT_M1:0] jmprt_thread_mask;
// genvar tm_i;
// generate
// for (tm_i = 0; tm_i < `NT; tm_i = tm_i + 1) begin
// assign jalrs_thread_mask[tm_i] = $signed(tm_i) <= $signed(VX_frE_to_bckE_req.b_reg_data[0]);
// end
// endgenerate
// genvar tm_ji;
// generate
// assign jmprt_thread_mask[0] = 1;
// for (tm_ji = 1; tm_ji < `NT; tm_ji = tm_ji + 1) begin
// assign jmprt_thread_mask[tm_ji] = 0;
// end
// endgenerate
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_jalrs || is_e_inst) ? `WB_JAL :
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB;
@@ -200,14 +189,6 @@ module VX_decode(
temp_jal = 1'b1 && in_valid[0];
temp_jal_offset = jal_2_offset;
end
`GPGPU_INST:
begin
if (is_jalrs || is_jmprt)
begin
temp_jal = 1'b1 && in_valid[0];
temp_jal_offset = 32'h0;
end
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
@@ -293,14 +274,6 @@ module VX_decode(
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && in_valid[0];
end
`GPGPU_INST:
begin
if (is_jalrs || is_jmprt)
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && in_valid[0];
end
end
default:
begin
temp_branch_type = `NO_BRANCH;
@@ -311,7 +284,7 @@ module VX_decode(
assign VX_frE_to_bckE_req.branch_type = temp_branch_type;
assign VX_wstall.wstall = temp_branch_stall && in_valid[0];
assign VX_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_join || is_barrier) && (|in_valid);
assign VX_wstall.warp_num = in_warp_num;
always @(*) begin

32
rtl/VX_gpgpu_inst.v Normal file
View File

@@ -0,0 +1,32 @@
module VX_gpgpu_inst (
// Input
VX_gpu_inst_req_inter VX_gpu_inst_req,
// Output
VX_warp_ctl_inter VX_warp_ctl
);
wire[`NT_M1:0] tmc_new_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
begin
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
end
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc || VX_gpu_inst_req.is_split) && (|VX_gpu_inst_req.valid);
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0);
assign VX_warp_ctl.wspawn = 0;
assign VX_warp_ctl.wspawn_pc = 0;
// VX_gpu_inst_req.is_wspawn
// VX_gpu_inst_req.is_split
// VX_gpu_inst_req.is_barrier
endmodule

View File

@@ -11,8 +11,6 @@ module VX_gpr_stage (
// Outputs
// Warp Control
VX_warp_ctl_inter VX_warp_ctl,
// Original Request 1 cycle later
VX_frE_to_bckE_req_inter VX_bckE_req_out,
// Data Read
@@ -75,22 +73,4 @@ module VX_gpr_stage (
.VX_bckE_req (VX_bckE_req_out)
);
// assign VX_warp_ctl.warp_num = VX_bckE_req_out.warp_num;
// assign VX_warp_ctl.wspawn = VX_bckE_req_out.wspawn;
// assign VX_warp_ctl.wspawn_pc = VX_bckE_req_out.a_reg_data[0];
// assign VX_warp_ctl.thread_mask = is_jalrs ? jalrs_thread_mask : jmprt_thread_mask;
// assign VX_warp_ctl.change_mask = is_jalrs || is_jmprt;
// assign VX_warp_ctl.ebreak = VX_bckE_req_out.ebreak;
assign VX_warp_ctl.warp_num = 0;
assign VX_warp_ctl.wspawn = 0;
assign VX_warp_ctl.wspawn_pc = 0;
assign VX_warp_ctl.thread_mask = 0;
assign VX_warp_ctl.change_mask = 0;
assign VX_warp_ctl.ebreak = 0;
endmodule

View File

@@ -5,16 +5,23 @@ module VX_inst_multiplex (
// Outputs
VX_exec_unit_req_inter VX_exec_unit_req,
VX_lsu_req_inter VX_lsu_req
VX_lsu_req_inter VX_lsu_req,
VX_gpu_inst_req_inter VX_gpu_inst_req
);
wire[`NT_M1:0] is_mem_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
wire[`NT_M1:0] is_gpu_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
// wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_gpu = 0;
genvar currT;
for (currT = 0; currT < `NT; currT = currT + 1) assign is_mem_mask[currT] = is_mem;
for (currT = 0; currT < `NT; currT = currT + 1) begin
assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu;
end
// LSU Unit
assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask;
@@ -31,7 +38,7 @@ module VX_inst_multiplex (
// Execute Unit
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask);
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask);
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
@@ -49,7 +56,6 @@ module VX_inst_multiplex (
assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual;
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
assign VX_exec_unit_req.wspawn = VX_bckE_req.wspawn;
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
assign VX_exec_unit_req.is_csr = VX_bckE_req.is_csr;
assign VX_exec_unit_req.csr_address = VX_bckE_req.csr_address;
@@ -57,4 +63,18 @@ module VX_inst_multiplex (
assign VX_exec_unit_req.csr_mask = VX_bckE_req.csr_mask;
endmodule
// GPR Req
assign VX_gpu_inst_req.valid = VX_bckE_req.valid & is_gpu_mask;
assign VX_gpu_inst_req.warp_num = VX_bckE_req.warp_num;
assign VX_gpu_inst_req.is_wspawn = VX_bckE_req.is_wspawn;
assign VX_gpu_inst_req.is_tmc = VX_bckE_req.is_tmc;
assign VX_gpu_inst_req.is_split = VX_bckE_req.is_split;
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
endmodule

View File

@@ -25,7 +25,6 @@ interface VX_frE_to_bckE_req_inter ();
wire[31:0] curr_PC;
/* verilator lint_off UNUSED */
wire ebreak;
wire wspawn;
/* verilator lint_on UNUSED */
wire jalQual;
wire jal;
@@ -34,6 +33,12 @@ interface VX_frE_to_bckE_req_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
// GPGPU stuff
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
endinterface

View File

@@ -0,0 +1,24 @@
`include "../VX_define.v"
`ifndef VX_GPU_INST_REQ_IN
`define VX_GPU_INST_REQ_IN
interface VX_gpu_inst_req_inter();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
wire[`NT_M1:0][31:0] a_reg_data;
wire[31:0] rd2;
endinterface
`endif

View File

@@ -10,10 +10,11 @@ interface VX_warp_ctl_inter ();
wire[`NW_M1:0] warp_num;
wire change_mask;
wire[`NT_M1:0] thread_mask;
wire wspawn;
wire[31:0] wspawn_pc;
wire ebreak;
wire ebreak;
endinterface

View File

@@ -18,14 +18,14 @@ module VX_d_e_reg (
wire flush = (in_branch_stall == `STALL);
VX_generic_register #(.N(237)) d_e_reg
VX_generic_register #(.N(240)) d_e_reg
(
.clk (clk),
.reset(reset),
.stall(stall),
.flush(flush),
.in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.wspawn, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num}),
.out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak , VX_bckE_req.wspawn ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num})
.in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num, VX_frE_to_bckE_req.is_wspawn, VX_frE_to_bckE_req.is_tmc, VX_frE_to_bckE_req.is_split, VX_frE_to_bckE_req.is_barrier}),
.out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num , VX_bckE_req.is_wspawn , VX_bckE_req.is_tmc , VX_bckE_req.is_split , VX_bckE_req.is_barrier })
);

View File

@@ -1,7 +0,0 @@
# Dynamic Instructions: 52683
# of total cycles: 52699
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.0003
# time to simulate: 0 milliseconds
# GRADE: Failed on test: 4294967295

View File

@@ -3,7 +3,7 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_
set symbol_library {}
set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db]
set verilog_files [ list VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_csr_write_request_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
set verilog_files [ list VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_csr_write_request_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
]
analyze -format sverilog $verilog_files