Inefficient context aware desgin

This commit is contained in:
felsabbagh3
2019-05-08 15:55:06 -07:00
parent 79356c7ab1
commit a6c13bc38c
21 changed files with 639 additions and 464 deletions

View File

@@ -3,6 +3,7 @@
module VX_context (
input wire clk,
input wire in_warp,
input wire in_valid[`NT_M1:0],
input wire in_write_register,
input wire[4:0] in_rd,
@@ -30,6 +31,7 @@ module VX_context (
VX_register_file vx_register_file_master(
.clk (clk),
.in_warp (in_warp),
.in_valid (in_valid[0]),
.in_write_register (in_write_register),
.in_rd (in_rd),
@@ -49,6 +51,7 @@ module VX_context (
assign to_clone = (index == rd1_register[0]) && (state_stall == 1);
VX_register_file_slave vx_register_file_slave(
.clk (clk),
.in_warp (in_warp),
.in_valid (in_valid[index]),
.in_write_register (in_write_register),
.in_rd (in_rd),

View File

@@ -3,54 +3,56 @@
`include "VX_define.v"
module VX_d_e_reg (
input wire clk,
input wire[4:0] in_rd,
input wire[4:0] in_rs1,
input wire[4:0] in_rs2,
input wire[31:0] in_a_reg_data[`NT_M1:0],
input wire[31:0] in_b_reg_data[`NT_M1:0],
input wire[4:0] in_alu_op,
input wire[1:0] in_wb,
input wire in_rs2_src, // NEW
input wire[31:0] in_itype_immed, // new
input wire[2:0] in_mem_read, // NEW
input wire[2:0] in_mem_write,
input wire[31:0] in_PC_next,
input wire[2:0] in_branch_type,
input wire in_fwd_stall,
input wire in_branch_stall,
input wire[19:0] in_upper_immed,
input wire[11:0] in_csr_address, // done
input wire in_is_csr, // done
input wire[31:0] in_csr_mask, // done
input wire[31:0] in_curr_PC,
input wire in_jal,
input wire[31:0] in_jal_offset,
input wire in_freeze,
input wire in_clone_stall,
input wire in_valid[`NT_M1:0],
input wire clk,
input wire[4:0] in_rd,
input wire[4:0] in_rs1,
input wire[4:0] in_rs2,
input wire[31:0] in_a_reg_data[`NT_M1:0],
input wire[31:0] in_b_reg_data[`NT_M1:0],
input wire[4:0] in_alu_op,
input wire[1:0] in_wb,
input wire in_rs2_src, // NEW
input wire[31:0] in_itype_immed, // new
input wire[2:0] in_mem_read, // NEW
input wire[2:0] in_mem_write,
input wire[31:0] in_PC_next,
input wire[2:0] in_branch_type,
input wire in_fwd_stall,
input wire in_branch_stall,
input wire[19:0] in_upper_immed,
input wire[11:0] in_csr_address, // done
input wire in_is_csr, // done
input wire[31:0] in_csr_mask, // done
input wire[31:0] in_curr_PC,
input wire in_jal,
input wire[31:0] in_jal_offset,
input wire in_freeze,
input wire in_clone_stall,
input wire in_valid[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address, // done
output wire out_is_csr, // done
output wire[31:0] out_csr_mask, // done
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[4:0] out_alu_op,
output wire[1:0] out_wb,
output wire out_rs2_src, // NEW
output wire[31:0] out_itype_immed, // new
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output wire[2:0] out_branch_type,
output wire[19:0] out_upper_immed,
output wire[31:0] out_curr_PC,
output wire out_jal,
output wire[31:0] out_jal_offset,
output wire[31:0] out_PC_next,
output wire out_valid[`NT_M1:0]
output wire[11:0] out_csr_address, // done
output wire out_is_csr, // done
output wire[31:0] out_csr_mask, // done
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[4:0] out_alu_op,
output wire[1:0] out_wb,
output wire out_rs2_src, // NEW
output wire[31:0] out_itype_immed, // new
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output wire[2:0] out_branch_type,
output wire[19:0] out_upper_immed,
output wire[31:0] out_curr_PC,
output wire out_jal,
output wire[31:0] out_jal_offset,
output wire[31:0] out_PC_next,
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
@@ -79,6 +81,8 @@ module VX_d_e_reg (
reg[31:0] reg_data_z[`NT_M1:0];
reg valid_z[`NT_M1:0];
reg[`NW_M1:0] warp_num;
integer ini_reg;
initial begin
rd = 0;
@@ -107,6 +111,7 @@ module VX_d_e_reg (
curr_PC = 0;
jal = `NO_JUMP;
jal_offset = 0;
warp_num = 0;
end
wire stalling;
@@ -134,6 +139,7 @@ module VX_d_e_reg (
assign out_jal_offset = jal_offset;
assign out_curr_PC = curr_PC;
assign out_valid = valid;
assign out_warp_num = warp_num;
always @(posedge clk) begin
@@ -159,6 +165,7 @@ module VX_d_e_reg (
jal_offset <= stalling ? 32'h0 : in_jal_offset;
curr_PC <= stalling ? 32'h0 : in_curr_PC;
valid <= stalling ? valid_z : in_valid;
warp_num <= stalling ? 0 : in_warp_num;
end
end

View File

@@ -12,6 +12,7 @@ module VX_decode(
input wire[4:0] in_rd,
input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num,
// FORWARDING INPUTS
input wire in_src1_fwd,
@@ -19,10 +20,15 @@ module VX_decode(
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
// Outputs
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
@@ -44,7 +50,8 @@ module VX_decode(
output reg out_clone_stall,
output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0]
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
wire[6:0] curr_opcode;
@@ -103,8 +110,11 @@ module VX_decode(
reg[4:0] alu_op;
reg[4:0] mul_alu;
wire context_zero_valid = (in_wb_warp_num == 0);
VX_context VX_Context(
.clk (clk),
.in_warp (context_zero_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
@@ -123,7 +133,7 @@ module VX_decode(
.out_clone_stall (out_clone_stall)
);
assign out_warp_num = in_warp_num;
assign out_valid = in_valid;
assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0);

View File

@@ -2,6 +2,8 @@
`define NT 2
`define NT_M1 1
`define NW_M1 1
`define R_INST 7'd51
`define L_INST 7'd3

View File

@@ -25,6 +25,7 @@ module VX_e_m_reg (
input wire[31:0] in_jal_dest,
input wire in_freeze,
input wire in_valid[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
@@ -44,7 +45,8 @@ module VX_e_m_reg (
output wire out_jal,
output wire[31:0] out_jal_dest,
output wire[31:0] out_PC_next,
output wire out_valid[`NT_M1:0]
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
@@ -67,7 +69,7 @@ module VX_e_m_reg (
reg jal;
reg[31:0] jal_dest;
reg valid[`NT_M1:0];
reg[`NW_M1:0] warp_num;
// reg[31:0] reg_data_z[`NT_T2_M1:0];
// reg[`NT_M1:0] valid_z;
// reg[31:0] alu_result_z[`NT_M1:0];
@@ -90,7 +92,7 @@ module VX_e_m_reg (
branch_type = 0;
jal = `NO_JUMP;
jal_dest = 0;
warp_num = 0;
for (ini_reg = 0; ini_reg < `NT; ini_reg = ini_reg + 1)
begin
a_reg_data[ini_reg] = 0;
@@ -121,7 +123,7 @@ module VX_e_m_reg (
assign out_jal = jal;
assign out_jal_dest = jal_dest;
assign out_valid = valid;
assign out_warp_num = warp_num;
always @(posedge clk) begin
if(in_freeze == 1'b0) begin
@@ -144,6 +146,7 @@ module VX_e_m_reg (
jal <= in_jal;
jal_dest <= in_jal_dest;
valid <= in_valid;
warp_num <= in_warp_num;
end
end

View File

@@ -24,6 +24,7 @@ module VX_execute (
input wire[31:0] in_jal_offset,
input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0],
input [`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
@@ -42,7 +43,8 @@ module VX_execute (
output wire[31:0] out_branch_offset,
output wire out_branch_stall,
output wire[31:0] out_PC_next,
output wire out_valid[`NT_M1:0]
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
@@ -101,6 +103,7 @@ module VX_execute (
assign out_csr_address = in_csr_address;
assign out_branch_offset = in_itype_immed;
assign out_valid = in_valid;
assign out_warp_num = in_warp_num;
endmodule // VX_execute

View File

@@ -10,10 +10,12 @@ module VX_f_d_reg (
input wire in_fwd_stall,
input wire in_freeze,
input wire in_clone_stall,
input wire[`NW_M1:0] in_warp_num,
output wire[31:0] out_instruction,
output wire[31:0] out_curr_PC,
output wire out_valid[`NT_M1:0]
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
// always @(posedge clk) begin
@@ -23,6 +25,7 @@ module VX_f_d_reg (
reg[31:0] instruction;
reg[31:0] curr_PC;
reg valid[`NT_M1:0];
reg[`NW_M1:0] warp_num;
integer reset_cur_thread = 0;
@@ -34,6 +37,7 @@ module VX_f_d_reg (
if(reset) begin
instruction <= 32'h0;
curr_PC <= 32'h0;
warp_num <= 0;
for (reset_cur_thread = 0; reset_cur_thread < `NT; reset_cur_thread = reset_cur_thread + 1)
valid[reset_cur_thread] <= 1'b0;
@@ -45,6 +49,7 @@ module VX_f_d_reg (
instruction <= in_instruction;
valid <= in_valid;
curr_PC <= in_curr_PC;
warp_num <= in_warp_num;
end
end
@@ -55,6 +60,7 @@ module VX_f_d_reg (
assign out_instruction = instruction;
assign out_curr_PC = curr_PC;
assign out_valid = valid;
assign out_warp_num = warp_num;

View File

@@ -21,7 +21,7 @@ module VX_fetch (
output wire[31:0] out_instruction,
output wire out_delay,
// output wire[1:0] out_warp_num,
output wire[`NW_M1:0] out_warp_num,
output wire[31:0] out_curr_PC,
output wire out_valid[`NT_M1:0]
);
@@ -29,13 +29,22 @@ module VX_fetch (
reg stall;
reg[31:0] out_PC;
// reg[1:0] warp_num;
reg[`NW_M1:0] warp_num;
reg[`NW_M1:0] warp_state;
// initial begin
// warp_num = 0;
// end
initial begin
warp_num = 0;
warp_state = 0;
end
always @(posedge clk or posedge reset) begin
if (reset || (warp_num == warp_state)) begin
warp_num <= 0;
end else begin
warp_num <= warp_num + 1;
end
end
assign stall = in_clone_stall || in_branch_stall || in_fwd_stall || in_branch_stall_exe || in_interrupt || in_freeze || in_debug;
@@ -68,7 +77,7 @@ module VX_fetch (
assign out_curr_PC = out_PC;
assign out_valid = warp_valid;
// assign out_warp_num = warp_num;
assign out_warp_num = warp_num;
assign out_delay = 0;
assign out_instruction = stall ? 32'b0 : in_instruction;

View File

@@ -3,34 +3,39 @@
module VX_forwarding (
// INFO FROM DECODE
input wire[4:0] in_decode_src1,
input wire[4:0] in_decode_src2,
input wire[11:0] in_decode_csr_address,
input wire[4:0] in_decode_src1,
input wire[4:0] in_decode_src2,
input wire[11:0] in_decode_csr_address,
input wire[`NW_M1:0] in_decode_warp_num,
// INFO FROM EXE
input wire[4:0] in_execute_dest,
input wire[1:0] in_execute_wb,
input wire[31:0] in_execute_alu_result[`NT_M1:0],
input wire[31:0] in_execute_PC_next,
input wire in_execute_is_csr,
input wire[11:0] in_execute_csr_address,
input wire[4:0] in_execute_dest,
input wire[1:0] in_execute_wb,
input wire[31:0] in_execute_alu_result[`NT_M1:0],
input wire[31:0] in_execute_PC_next,
input wire in_execute_is_csr,
input wire[11:0] in_execute_csr_address,
input wire[`NW_M1:0] in_execute_warp_num,
// INFO FROM MEM
input wire[4:0] in_memory_dest,
input wire[1:0] in_memory_wb,
input wire[31:0] in_memory_alu_result[`NT_M1:0],
input wire[31:0] in_memory_mem_data[`NT_M1:0],
input wire[31:0] in_memory_PC_next,
input wire in_memory_is_csr,
input wire[11:0] in_memory_csr_address,
input wire[31:0] in_memory_csr_result,
input wire[4:0] in_memory_dest,
input wire[1:0] in_memory_wb,
input wire[31:0] in_memory_alu_result[`NT_M1:0],
input wire[31:0] in_memory_mem_data[`NT_M1:0],
input wire[31:0] in_memory_PC_next,
input wire in_memory_is_csr,
input wire[11:0] in_memory_csr_address,
input wire[31:0] in_memory_csr_result,
input wire[`NW_M1:0] in_memory_warp_num,
// INFO FROM WB
input wire[4:0] in_writeback_dest,
input wire[1:0] in_writeback_wb,
input wire[31:0] in_writeback_alu_result[`NT_M1:0],
input wire[31:0] in_writeback_mem_data[`NT_M1:0],
input wire[31:0] in_writeback_PC_next,
input wire[4:0] in_writeback_dest,
input wire[1:0] in_writeback_wb,
input wire[31:0] in_writeback_alu_result[`NT_M1:0],
input wire[31:0] in_writeback_mem_data[`NT_M1:0],
input wire[31:0] in_writeback_PC_next,
input wire[`NW_M1:0] in_writeback_warp_num,
// OUT SIGNALS
output wire out_src1_fwd,
@@ -92,16 +97,19 @@ module VX_forwarding (
// SRC1
assign src1_exe_fwd = ((in_decode_src1 == in_execute_dest) &&
(in_decode_src1 != `ZERO_REG) &&
(in_execute_wb != `NO_WB));
(in_execute_wb != `NO_WB)) &&
(in_decode_warp_num == in_execute_warp_num);
assign src1_mem_fwd = ((in_decode_src1 == in_memory_dest) &&
(in_decode_src1 != `ZERO_REG) &&
(in_memory_wb != `NO_WB) &&
(!src1_exe_fwd));
(!src1_exe_fwd)) &&
(in_decode_warp_num == in_memory_warp_num);
assign src1_wb_fwd = ((in_decode_src1 == in_writeback_dest) &&
(in_decode_src1 != `ZERO_REG) &&
(in_writeback_wb != `NO_WB) &&
(in_writeback_warp_num == in_decode_warp_num) &&
(!src1_exe_fwd) &&
(!src1_mem_fwd));
@@ -115,18 +123,21 @@ module VX_forwarding (
// SRC2
assign src2_exe_fwd = ((in_decode_src2 == in_execute_dest) &&
(in_decode_src2 != `ZERO_REG) &&
(in_execute_wb != `NO_WB));
(in_execute_wb != `NO_WB)) &&
(in_decode_warp_num == in_execute_warp_num);
assign src2_mem_fwd = ((in_decode_src2 == in_memory_dest) &&
(in_decode_src2 != `ZERO_REG) &&
(in_memory_wb != `NO_WB) &&
(!src2_exe_fwd));
(!src2_exe_fwd)) &&
(in_decode_warp_num == in_memory_warp_num);
assign src2_wb_fwd = ((in_decode_src2 == in_writeback_dest) &&
(in_decode_src2 != `ZERO_REG) &&
(in_writeback_wb != `NO_WB) &&
(!src2_exe_fwd) &&
(!src2_mem_fwd));
(!src2_mem_fwd)) &&
(in_writeback_warp_num == in_decode_warp_num);
assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || src2_wb_fwd; // COMMENT

View File

@@ -13,6 +13,7 @@ module VX_m_w_reg (
input wire[31:0] in_PC_next,
input wire in_freeze,
input wire in_valid[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[31:0] out_alu_result[`NT_M1:0],
output wire[31:0] out_mem_result[`NT_M1:0], // NEW
@@ -21,7 +22,8 @@ module VX_m_w_reg (
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_PC_next,
output wire out_valid[`NT_M1:0]
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
@@ -34,7 +36,7 @@ module VX_m_w_reg (
reg[1:0] wb;
reg[31:0] PC_next;
reg valid[`NT_M1:0];
reg[`NW_M1:0] warp_num;
initial begin
// alu_result = 0;
@@ -44,6 +46,7 @@ module VX_m_w_reg (
rs2 = 0;
wb = 0;
PC_next = 0;
warp_num = 0;
// valid = 0;
end
@@ -55,7 +58,7 @@ module VX_m_w_reg (
assign out_wb = wb;
assign out_PC_next = PC_next;
assign out_valid = valid;
assign out_warp_num = warp_num;
always @(posedge clk) begin
if(in_freeze == 1'b0) begin
@@ -67,6 +70,7 @@ module VX_m_w_reg (
wb <= in_wb;
PC_next <= in_PC_next;
valid <= in_valid;
warp_num <= in_warp_num;
end
end

View File

@@ -20,6 +20,7 @@ module VX_memory (
input wire[2:0] in_branch_type,
input wire in_valid[`NT_M1:0],
input wire[31:0] in_cache_driver_out_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[31:0] out_alu_result[`NT_M1:0],
output wire[31:0] out_mem_result[`NT_M1:0],
@@ -36,7 +37,8 @@ module VX_memory (
output wire[2:0] out_cache_driver_in_mem_read,
output wire[2:0] out_cache_driver_in_mem_write,
output wire out_cache_driver_in_valid[`NT_M1:0],
output wire[31:0] out_cache_driver_in_data[`NT_M1:0]
output wire[31:0] out_cache_driver_in_data[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
);
// always @(in_mem_read, in_cache_driver_out_data) begin
@@ -88,6 +90,7 @@ module VX_memory (
assign out_rs2 = in_rs2;
assign out_PC_next = in_PC_next;
assign out_valid = in_valid;
assign out_warp_num = in_warp_num;
// always @(*) begin

View File

@@ -2,6 +2,7 @@
module VX_register_file (
input wire clk,
input wire in_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
@@ -37,7 +38,7 @@ module VX_register_file (
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid;
always @(posedge clk) begin
if(write_enable) begin
if(write_enable && in_warp) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end

View File

@@ -5,6 +5,7 @@
module VX_register_file_slave (
input wire clk,
input wire in_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
@@ -42,7 +43,7 @@ module VX_register_file_slave (
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid;
always @(posedge clk) begin
if(write_enable && !in_clone) begin
if(write_enable && !in_clone && in_warp) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end else if (in_clone && in_to_clone) begin

View File

@@ -14,10 +14,12 @@ module VX_writeback (
/* verilator lint_off UNUSED */
input wire in_valid[`NT_M1:0],
/* verilator lint_on UNUSED */
input wire [`NW_M1:0] in_warp_num,
output wire[31:0] out_write_data[`NT_M1:0],
output wire[4:0] out_rd,
output wire[1:0] out_wb
output wire[1:0] out_wb,
output wire[`NW_M1:0] out_warp_num
);
wire is_jal;
@@ -60,6 +62,7 @@ module VX_writeback (
assign out_rd = in_rd;
assign out_wb = in_wb;
assign out_warp_num = in_warp_num;
endmodule // VX_writeback

View File

@@ -25,15 +25,17 @@ module Vortex(
assign curr_PC = fetch_curr_PC;
// From fetch
wire[31:0] fetch_instruction;
wire fetch_delay;
wire[31:0] fetch_curr_PC;
wire fetch_valid[`NT_M1:0];
wire[31:0] fetch_instruction;
wire fetch_delay;
wire[31:0] fetch_curr_PC;
wire fetch_valid[`NT_M1:0];
wire[`NW_M1:0] fetch_warp_num;
// From f_d_register
wire[31:0] f_d_instruction;
wire[31:0] f_d_curr_PC;
wire f_d_valid[`NT_M1:0];
wire[31:0] f_d_instruction;
wire[31:0] f_d_curr_PC;
wire f_d_valid[`NT_M1:0];
wire[`NW_M1:0] f_d_warp_num;
// From decode
wire decode_branch_stall;
@@ -60,6 +62,7 @@ wire decode_valid[`NT_M1:0];
wire decode_clone_stall;
wire decode_change_mask;
wire decode_thread_mask[`NT_M1:0];
wire[`NW_M1:0] decode_warp_num;
// From d_e_register
wire[11:0] d_e_csr_address;
@@ -82,7 +85,8 @@ wire[31:0] d_e_curr_PC;
wire d_e_jal;
wire[31:0] d_e_jal_offset;
wire[31:0] d_e_PC_next;
wire d_e_valid[`NT_M1:0];
wire d_e_valid[`NT_M1:0];
wire[`NW_M1:0] d_e_warp_num;
// From execute
@@ -104,6 +108,7 @@ wire[31:0] execute_jal_dest;
wire[31:0] execute_branch_offset;
wire[31:0] execute_PC_next;
wire execute_valid[`NT_M1:0];
wire[`NW_M1:0] execute_warp_num;
// From e_m_register
@@ -128,6 +133,7 @@ wire[31:0] e_m_branch_offset;
wire[2:0] e_m_branch_type;
wire[31:0] e_m_PC_next;
wire e_m_valid[`NT_M1:0];
wire[`NW_M1:0] e_m_warp_num;
// From memory
@@ -142,6 +148,7 @@ wire[4:0] memory_rs1;
wire[4:0] memory_rs2;
wire[31:0] memory_PC_next;
wire memory_valid[`NT_M1:0];
wire[`NW_M1:0] memory_warp_num;
// From m_w_register
wire[31:0] m_w_alu_result[`NT_M1:0];
@@ -153,27 +160,29 @@ wire[4:0] m_w_rs1;
wire[4:0] m_w_rs2;
/* verilator lint_on UNUSED */
wire[31:0] m_w_PC_next;
wire m_w_valid[`NT_M1:0];
wire m_w_valid[`NT_M1:0];
wire[`NW_M1:0] m_w_warp_num;
// From writeback
wire[31:0] writeback_write_data[`NT_M1:0];
wire[4:0] writeback_rd;
wire[1:0] writeback_wb;
wire[31:0] writeback_write_data[`NT_M1:0];
wire[4:0] writeback_rd;
wire[1:0] writeback_wb;
wire[`NW_M1:0] writeback_warp_num;
// From csr handler
wire[31:0] csr_decode_csr_data;
wire[31:0] csr_decode_csr_data;
// From forwarding
wire forwarding_fwd_stall;
wire forwarding_src1_fwd;
wire forwarding_src2_fwd;
wire forwarding_fwd_stall;
wire forwarding_src1_fwd;
wire forwarding_src2_fwd;
/* verilator lint_off UNUSED */
wire forwarding_csr_fwd;
wire[31:0] forwarding_csr_fwd_data;
wire forwarding_csr_fwd;
wire[31:0] forwarding_csr_fwd_data;
/* verilator lint_on UNUSED */
wire[31:0] forwarding_src1_fwd_data[`NT_M1:0];
wire[31:0] forwarding_src2_fwd_data[`NT_M1:0];
wire[31:0] forwarding_src1_fwd_data[`NT_M1:0];
wire[31:0] forwarding_src2_fwd_data[`NT_M1:0];
// Internal
@@ -207,6 +216,7 @@ VX_fetch vx_fetch(
.out_instruction (fetch_instruction),
.out_delay (fetch_delay),
.out_curr_PC (fetch_curr_PC),
.out_warp_num (fetch_warp_num),
.out_valid (fetch_valid)
);
@@ -220,9 +230,11 @@ VX_f_d_reg vx_f_d_reg(
.in_fwd_stall (forwarding_fwd_stall),
.in_freeze (total_freeze),
.in_clone_stall (decode_clone_stall),
.in_warp_num (fetch_warp_num),
.out_instruction(f_d_instruction),
.out_curr_PC (f_d_curr_PC),
.out_valid (f_d_valid)
.out_valid (f_d_valid),
.out_warp_num (f_d_warp_num)
);
@@ -234,16 +246,17 @@ VX_decode vx_decode(
.in_write_data (writeback_write_data),
.in_rd (writeback_rd),
.in_wb (writeback_wb),
.in_wb_warp_num (writeback_warp_num),
.in_wb_valid (m_w_valid),
.in_src1_fwd (forwarding_src1_fwd),
.in_src1_fwd_data(forwarding_src1_fwd_data),
.in_src2_fwd (forwarding_src2_fwd),
.in_src2_fwd_data(forwarding_src2_fwd_data),
.in_warp_num (f_d_warp_num),
.out_csr_address (decode_csr_address),
.out_is_csr (decode_is_csr),
.out_csr_mask (decode_csr_mask),
.out_rd (decode_rd),
.out_rs1 (decode_rs1),
.out_rs2 (decode_rs2),
@@ -264,7 +277,8 @@ VX_decode vx_decode(
.out_valid (decode_valid),
.out_clone_stall (decode_clone_stall),
.out_change_mask (decode_change_mask),
.out_thread_mask (decode_thread_mask)
.out_thread_mask (decode_thread_mask),
.out_warp_num (decode_warp_num)
);
@@ -295,6 +309,7 @@ VX_d_e_reg vx_d_e_reg(
.in_freeze (total_freeze),
.in_valid (decode_valid),
.in_clone_stall (decode_clone_stall),
.in_warp_num (decode_warp_num),
.out_csr_address(d_e_csr_address),
.out_is_csr (d_e_is_csr),
@@ -316,7 +331,8 @@ VX_d_e_reg vx_d_e_reg(
.out_jal (d_e_jal),
.out_jal_offset (d_e_jal_offset),
.out_PC_next (d_e_PC_next),
.out_valid (d_e_valid)
.out_valid (d_e_valid),
.out_warp_num (d_e_warp_num)
);
VX_execute vx_execute(
@@ -342,6 +358,7 @@ VX_execute vx_execute(
.in_jal_offset (d_e_jal_offset),
.in_curr_PC (d_e_curr_PC),
.in_valid (d_e_valid),
.in_warp_num (d_e_warp_num),
.out_csr_address (execute_csr_address),
.out_is_csr (execute_is_csr),
@@ -360,7 +377,8 @@ VX_execute vx_execute(
.out_branch_offset(execute_branch_offset),
.out_branch_stall (execute_branch_stall),
.out_PC_next (execute_PC_next),
.out_valid (execute_valid)
.out_valid (execute_valid),
.out_warp_num (execute_warp_num)
);
VX_e_m_reg vx_e_m_reg(
@@ -385,6 +403,7 @@ VX_e_m_reg vx_e_m_reg(
.in_jal_dest (execute_jal_dest),
.in_freeze (total_freeze),
.in_valid (execute_valid),
.in_warp_num (execute_warp_num),
.out_csr_address (e_m_csr_address),
.out_is_csr (e_m_is_csr),
@@ -404,7 +423,8 @@ VX_e_m_reg vx_e_m_reg(
.out_jal (e_m_jal),
.out_jal_dest (e_m_jal_dest),
.out_PC_next (e_m_PC_next),
.out_valid (e_m_valid)
.out_valid (e_m_valid),
.out_warp_num (e_m_warp_num)
);
// wire[31:0] use_rd2[`NT_M1:0];
@@ -428,6 +448,7 @@ VX_memory vx_memory(
.in_branch_type (e_m_branch_type),
.in_valid (e_m_valid),
.in_cache_driver_out_data (in_cache_driver_out_data),
.in_warp_num (e_m_warp_num),
.out_alu_result (memory_alu_result),
.out_mem_result (memory_mem_result),
@@ -440,6 +461,7 @@ VX_memory vx_memory(
.out_delay (memory_delay),
.out_PC_next (memory_PC_next),
.out_valid (memory_valid),
.out_warp_num (memory_warp_num),
.out_cache_driver_in_address (out_cache_driver_in_address),
.out_cache_driver_in_mem_read (out_cache_driver_in_mem_read),
.out_cache_driver_in_mem_write(out_cache_driver_in_mem_write),
@@ -458,6 +480,7 @@ VX_m_w_reg vx_m_w_reg(
.in_PC_next (memory_PC_next),
.in_freeze (total_freeze),
.in_valid (memory_valid),
.in_warp_num (memory_warp_num),
.out_alu_result(m_w_alu_result),
.out_mem_result(m_w_mem_result),
@@ -466,7 +489,8 @@ VX_m_w_reg vx_m_w_reg(
.out_rs1 (m_w_rs1),
.out_rs2 (m_w_rs2),
.out_PC_next (m_w_PC_next),
.out_valid (m_w_valid)
.out_valid (m_w_valid),
.out_warp_num (m_w_warp_num)
);
@@ -478,10 +502,12 @@ VX_writeback vx_writeback(
.in_wb (m_w_wb),
.in_PC_next (m_w_PC_next),
.in_valid (m_w_valid),
.in_warp_num (m_w_warp_num),
.out_write_data(writeback_write_data),
.out_rd (writeback_rd),
.out_wb (writeback_wb)
.out_wb (writeback_wb),
.out_warp_num (writeback_warp_num)
);
@@ -489,6 +515,7 @@ VX_forwarding vx_forwarding(
.in_decode_src1 (decode_rs1),
.in_decode_src2 (decode_rs2),
.in_decode_csr_address (decode_csr_address),
.in_decode_warp_num (decode_warp_num),
.in_execute_dest (execute_rd),
.in_execute_wb (execute_wb),
@@ -496,6 +523,7 @@ VX_forwarding vx_forwarding(
.in_execute_PC_next (execute_PC_next),
.in_execute_is_csr (execute_is_csr),
.in_execute_csr_address (execute_csr_address),
.in_execute_warp_num (execute_warp_num),
.in_memory_dest (memory_rd),
.in_memory_wb (memory_wb),
@@ -505,12 +533,14 @@ VX_forwarding vx_forwarding(
.in_memory_is_csr (e_m_is_csr),
.in_memory_csr_address (e_m_csr_address),
.in_memory_csr_result (e_m_csr_result),
.in_memory_warp_num (memory_warp_num),
.in_writeback_dest (m_w_rd),
.in_writeback_wb (m_w_wb),
.in_writeback_alu_result(m_w_alu_result),
.in_writeback_mem_data (m_w_mem_result),
.in_writeback_PC_next (m_w_PC_next),
.in_writeback_warp_num (writeback_warp_num),
.out_src1_fwd (forwarding_src1_fwd),
.out_src2_fwd (forwarding_src2_fwd),

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -47,6 +47,9 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__forwarding_src1_fwd,0,0);
VL_SIG8(Vortex__DOT__forwarding_src2_fwd,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_state,1,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_itype,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_csr,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_clone,0,0);
@@ -64,6 +67,7 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__branch_type,2,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__is_csr,0,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__jal,0,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__stalling,0,0);
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__rd,4,0);
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__wb,1,0);
@@ -72,8 +76,10 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__is_csr,0,0);
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__branch_type,2,0);
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__jal,0,0);
VL_SIG8(Vortex__DOT__vx_e_m_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_m_w_reg__DOT__rd,4,0);
VL_SIG8(Vortex__DOT__vx_m_w_reg__DOT__wb,1,0);
VL_SIG8(Vortex__DOT__vx_m_w_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_forwarding__DOT__src1_exe_fwd,0,0);
VL_SIG8(Vortex__DOT__vx_forwarding__DOT__src1_mem_fwd,0,0);
VL_SIG8(Vortex__DOT__vx_forwarding__DOT__src1_wb_fwd,0,0);
@@ -95,14 +101,14 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__PC_next_out,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__itype_immed,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__upper_immed,19,0);
};
struct {
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__csr_mask,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__jal_offset,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT__genblk1__BRA__0__KET____DOT__vx_alu__DOT__ALU_in2,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT__genblk1__BRA__1__KET____DOT__vx_alu__DOT__ALU_in2,31,0);
VL_SIG(Vortex__DOT__vx_e_m_reg__DOT__PC_next,31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_e_m_reg__DOT__csr_result,31,0);
VL_SIG(Vortex__DOT__vx_e_m_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_e_m_reg__DOT__branch_offset,31,0);
@@ -161,6 +167,8 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__vx_writeback__DOT__out_pc_data[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_execute_PC_next[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_memory_PC_next[2],31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_writeback_PC_next[2],31,0);
VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__csr[4096],11,0);
};
@@ -171,6 +179,7 @@ VL_MODULE(VVortex) {
struct {
// Begin mtask footprint all:
VL_SIG8(__Vtableidx1,2,0);
VL_SIG8(__Vdly__Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(__Vclklast__TOP__clk,0,0);
VL_SIG8(__Vclklast__TOP__reset,0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
@@ -233,9 +242,9 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src1_fwd_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_mem_data[2],31,0);
};
struct {
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_execute_alu_result[2],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[2],0,0);

Binary file not shown.

Binary file not shown.

View File

@@ -2,28 +2,28 @@
C "-Wall -cc Vortex.v --exe test_bench.cpp"
S 4608404 12889046060 1553037052 0 1548678579 0 "/usr/local/Cellar/verilator/4.010/bin/verilator_bin"
S 2785 12889457986 1554064009 0 1554064009 0 "VX_alu.v"
S 3192 12890338917 1557297615 0 1557297615 0 "VX_context.v"
S 3288 12890338917 1557354788 0 1557354788 0 "VX_context.v"
S 1495 12889457987 1554023089 0 1554023089 0 "VX_csr_handler.v"
S 5105 12889457988 1554023089 0 1554023089 0 "VX_d_e_reg.v"
S 11838 12890307904 1557297599 0 1557297599 0 "VX_decode.v"
S 1557 12890307906 1557297794 0 1557297794 0 "VX_define.v"
S 4077 12889457992 1554023089 0 1554023089 0 "VX_e_m_reg.v"
S 3288 12889457993 1554023938 0 1554023938 0 "VX_execute.v"
S 1558 12889457994 1554064040 0 1554064040 0 "VX_f_d_reg.v"
S 1816 12890309989 1557267615 0 1557267615 0 "VX_fetch.v"
S 5632 12889457996 1554023089 0 1554023089 0 "VX_forwarding.v"
S 1677 12889457997 1554023089 0 1554023089 0 "VX_m_w_reg.v"
S 3732 12890309990 1557110604 0 1557110604 0 "VX_memory.v"
S 1078 12889457999 1554023928 0 1554023928 0 "VX_register_file.v"
S 1387 12889458000 1554023933 0 1554023933 0 "VX_register_file_slave.v"
S 5512 12889457988 1557345046 0 1557345046 0 "VX_d_e_reg.v"
S 12085 12890307904 1557354665 0 1557354665 0 "VX_decode.v"
S 1574 12890307906 1557343909 0 1557343909 0 "VX_define.v"
S 4267 12889457992 1557345117 0 1557345117 0 "VX_e_m_reg.v"
S 3405 12889457993 1557348460 0 1557348460 0 "VX_execute.v"
S 1751 12889457994 1557344924 0 1557344924 0 "VX_f_d_reg.v"
S 2030 12890309989 1557348839 0 1557348839 0 "VX_fetch.v"
S 6293 12889457996 1557348346 0 1557348346 0 "VX_forwarding.v"
S 1866 12889457997 1557348551 0 1557348551 0 "VX_m_w_reg.v"
S 3847 12890309990 1557348518 0 1557348518 0 "VX_memory.v"
S 1118 12889457999 1557354753 0 1557354753 0 "VX_register_file.v"
S 1428 12889458000 1557354772 0 1557354772 0 "VX_register_file_slave.v"
S 1499 12890308905 1557267602 0 1557267602 0 "VX_warp.v"
S 1454 12890307909 1557104321 0 1557104321 0 "VX_writeback.v"
S 16949 12890307910 1557104321 0 1557104321 0 "Vortex.v"
T 272889 12890339974 1557297809 0 1557297809 0 "obj_dir/VVortex.cpp"
T 16351 12890339973 1557297809 0 1557297809 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557297809 0 1557297809 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557297809 0 1557297809 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557297809 0 1557297809 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557297809 0 1557297809 0 "obj_dir/VVortex__ver.d"
T 0 0 1557297809 0 1557297809 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557297809 0 1557297809 0 "obj_dir/VVortex_classes.mk"
S 1568 12890307909 1557348531 0 1557348531 0 "VX_writeback.v"
S 18162 12890307910 1557354587 0 1557354587 0 "Vortex.v"
T 276843 12890339974 1557354791 0 1557354791 0 "obj_dir/VVortex.cpp"
T 16753 12890339973 1557354791 0 1557354791 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557354791 0 1557354791 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557354791 0 1557354791 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557354791 0 1557354791 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557354791 0 1557354791 0 "obj_dir/VVortex__ver.d"
T 0 0 1557354791 0 1557354791 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557354791 0 1557354791 0 "obj_dir/VVortex_classes.mk"