From 95047fcadcb4b035b18d5abcb242ac5d3bca2464 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Thu, 17 Oct 2019 00:48:54 -0400 Subject: [PATCH] Rename Stage that removes the need for forwarding --- rtl/Makefile | 2 +- rtl/VX_back_end.v | 4 +- rtl/VX_fetch.v | 3 +- rtl/VX_forwarding.v | 15 ++- rtl/VX_front_end.v | 5 +- rtl/VX_gpr.v | 253 ++++++++++++++++---------------------------- rtl/VX_gpr_stage.v | 9 +- rtl/VX_scheduler.v | 45 +++++++- rtl/Vortex.v | 10 +- rtl/results.txt | 8 +- rtl/tb_debug.h | 2 +- rtl/test_bench.h | 4 +- 12 files changed, 177 insertions(+), 183 deletions(-) diff --git a/rtl/Makefile b/rtl/Makefile index 0fe3c215..07b2a5be 100644 --- a/rtl/Makefile +++ b/rtl/Makefile @@ -3,7 +3,7 @@ all: RUNFILE # -LDFLAGS '-lsystemc' VERILATOR: echo "#define VCD_OFF" > tb_debug.h - verilator --compiler gcc -Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/' + verilator --compiler gcc --Wno-PINMISSING -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/' compdebug: echo "#define VCD_OUTPUT" > tb_debug.h diff --git a/rtl/VX_back_end.v b/rtl/VX_back_end.v index eb461fd9..55e5a62e 100644 --- a/rtl/VX_back_end.v +++ b/rtl/VX_back_end.v @@ -2,8 +2,9 @@ module VX_back_end ( input wire clk, input wire reset, input wire fetch_delay, + input wire schedule_delay, - input wire[31:0] csr_decode_csr_data, + input wire[31:0] csr_decode_csr_data, output wire execute_branch_stall, input wire in_fwd_stall, @@ -64,6 +65,7 @@ VX_frE_to_bckE_req_inter VX_bckE_req_out(); VX_gpr_stage VX_gpr_stage( .clk (clk), + .schedule_delay (schedule_delay), .VX_writeback_inter(VX_writeback_inter), .VX_fwd_rsp (VX_fwd_rsp), .in_fwd_stall (in_fwd_stall), diff --git a/rtl/VX_fetch.v b/rtl/VX_fetch.v index fe0e0678..2a726e92 100644 --- a/rtl/VX_fetch.v +++ b/rtl/VX_fetch.v @@ -8,6 +8,7 @@ module VX_fetch ( input wire in_fwd_stall, input wire in_branch_stall_exe, input wire in_gpr_stall, + input wire schedule_delay, VX_icache_response_inter icache_response, VX_icache_request_inter icache_request, @@ -28,7 +29,7 @@ module VX_fetch ( wire warp_stall; - assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze; + assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze || schedule_delay; assign warp_stall = in_branch_stall || (in_branch_stall_exe && 0); diff --git a/rtl/VX_forwarding.v b/rtl/VX_forwarding.v index 987b6f6f..3d84e7f6 100644 --- a/rtl/VX_forwarding.v +++ b/rtl/VX_forwarding.v @@ -111,7 +111,8 @@ module VX_forwarding ( (!src1_mem_fwd)); - assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0); + // assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0); + assign out_src1_fwd = 0; @@ -137,15 +138,19 @@ module VX_forwarding ( (in_writeback_warp_num == in_decode_warp_num); - assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0); + // assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0); + assign out_src2_fwd = 0; - wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL; - wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL; + // wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL; + // wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL; + wire exe_mem_read_stall = `NO_STALL; + wire mem_mem_read_stall = `NO_STALL; - assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall; + // assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall; + assign out_fwd_stall = 0; // always @(*) begin // if (out_fwd_stall) $display("FWD STALL"); diff --git a/rtl/VX_front_end.v b/rtl/VX_front_end.v index b678a4ce..6f5ec581 100644 --- a/rtl/VX_front_end.v +++ b/rtl/VX_front_end.v @@ -9,6 +9,7 @@ module VX_front_end ( input wire execute_branch_stall, input wire in_gpr_stall, + input wire schedule_delay, VX_warp_ctl_inter VX_warp_ctl, @@ -18,7 +19,6 @@ module VX_front_end ( VX_jal_response_inter VX_jal_rsp, VX_branch_response_inter VX_branch_rsp, - VX_wb_inter VX_writeback_inter, VX_frE_to_bckE_req_inter VX_bckE_req, @@ -38,7 +38,7 @@ wire decode_branch_stall; wire decode_gpr_stall; -wire total_freeze = memory_delay || fetch_delay || in_gpr_stall; +wire total_freeze = memory_delay || fetch_delay || in_gpr_stall || schedule_delay; /* verilator lint_off UNUSED */ wire real_fetch_ebreak; @@ -49,6 +49,7 @@ VX_fetch vx_fetch( .in_memory_delay (memory_delay), .in_branch_stall (decode_branch_stall), .in_fwd_stall (forwarding_fwd_stall), + .schedule_delay (schedule_delay), .in_branch_stall_exe(execute_branch_stall), .in_gpr_stall (decode_gpr_stall), .VX_jal_rsp (VX_jal_rsp), diff --git a/rtl/VX_gpr.v b/rtl/VX_gpr.v index 25bfe6b3..00502256 100644 --- a/rtl/VX_gpr.v +++ b/rtl/VX_gpr.v @@ -15,13 +15,6 @@ module VX_gpr ( assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0)); -// <<<<<<< HEAD - - - // always @(*) begin - // if(write_enable) $display("Writing to %d: %d = %h",VX_writeback_inter.wb_warp_num, VX_writeback_inter.rd, VX_writeback_inter.write_data[0][31:0]); - // end - // byte_enabled_simple_dual_port_ram first_ram( // .we (write_enable), // .clk (clk), @@ -35,160 +28,102 @@ module VX_gpr ( // ); // ======= - // byte_enabled_simple_dual_port_ram first_ram( - // .we (write_enable), - // .clk (clk), - // .waddr (VX_writeback_inter.rd), - // .raddr1(VX_gpr_read.rs1), - // .be (VX_writeback_inter.wb_valid), - // .wdata (VX_writeback_inter.write_data), - // .q1 (out_a_reg_data) - // ); + byte_enabled_simple_dual_port_ram first_ram( + .we (write_enable), + .clk (clk), + .waddr (VX_writeback_inter.rd), + .raddr1(VX_gpr_read.rs1), + .be (VX_writeback_inter.wb_valid), + .wdata (VX_writeback_inter.write_data), + .q1 (out_a_reg_data) + ); - // byte_enabled_simple_dual_port_ram first_ram( - // .we (write_enable), - // .clk (clk), - // .waddr (VX_writeback_inter.rd), - // .raddr1(VX_gpr_read.rs2), - // .be (VX_writeback_inter.wb_valid), - // .wdata (VX_writeback_inter.write_data), - // .q1 (out_b_reg_data) - // ); + byte_enabled_simple_dual_port_ram second_ram( + .we (write_enable), + .clk (clk), + .waddr (VX_writeback_inter.rd), + .raddr1(VX_gpr_read.rs2), + .be (VX_writeback_inter.wb_valid), + .wdata (VX_writeback_inter.write_data), + .q1 (out_b_reg_data) + ); - wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}}; - - // Port A is a read port, Port B is a write port - - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(out_a_reg_data), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(1'b0), - .AA(VX_gpr_read.rs1), - .CLKB(clk), - .CENB(1'b0), - .WENB(write_bit_mask), - .AB(VX_writeback_inter.rd), - .DB(VX_writeback_inter.write_data), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(out_b_reg_data), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(1'b0), - .AA(VX_gpr_read.rs2), - .CLKB(clk), - .CENB(1'b0), - .WENB(write_bit_mask), - .AB(VX_writeback_inter.rd), - .DB(VX_writeback_inter.write_data), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ -// >>>>>>> 5680b997b599ce2900997cab976681fe3881e880 - - - - - - - // // USING RAM blocks - // // First RAM - // byte_enabled_simple_dual_port_ram first_ram( - // .we (write_enable), - // .clk (clk), - // .waddr(VX_writeback_inter.rd), - // .raddr(VX_gpr_read.rs1), - // .be (VX_writeback_inter.wb_valid), - // .wdata(VX_writeback_inter.write_data), - // .q (out_a_reg_data) - // ); - - // // Second RAM block - // byte_enabled_simple_dual_port_ram second_ram( - // .we (write_enable), - // .clk (clk), - // .waddr(VX_writeback_inter.rd), - // .raddr(VX_gpr_read.rs2), - // .be (VX_writeback_inter.wb_valid), - // .wdata(VX_writeback_inter.write_data), - // .q (out_b_reg_data) - // ); - - - - // logic[`NT_M1:0][31:0] gpr[31:0]; // gpr[register_number][thread_number][data_bits] - - // wire write_enable; - - // assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0)); - // assign read_enable = valid_request; - - // // Using Registers - // integer thread_index; - // always_ff@(posedge clk) - // begin - // if (write_enable) begin - // for (thread_index = 0; thread_index <= `NT_M1; thread_index = thread_index + 1) begin - // if (VX_writeback_inter.wb_valid[thread_index]) begin - // gpr[VX_writeback_inter.rd][thread_index] <= VX_writeback_inter.write_data[thread_index]; - // end - // end - // end - // out_a_reg_data <= gpr[VX_gpr_read.rs1]; - // out_b_reg_data <= gpr[VX_gpr_read.rs2]; - // end + // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}}; + // /* verilator lint_off PINCONNECTEMPTY */ + // rf2_32x128_wm1 first_ram ( + // .CENYA(), + // .AYA(), + // .CENYB(), + // .WENYB(), + // .AYB(), + // .QA(out_a_reg_data), + // .SOA(), + // .SOB(), + // .CLKA(clk), + // .CENA(1'b0), + // .AA(VX_gpr_read.rs1), + // .CLKB(clk), + // .CENB(1'b0), + // .WENB(write_bit_mask), + // .AB(VX_writeback_inter.rd), + // .DB(VX_writeback_inter.write_data), + // .EMAA(3'b011), + // .EMASA(1'b0), + // .EMAB(3'b011), + // .TENA(1'b1), + // .TCENA(1'b0), + // .TAA(5'b0), + // .TENB(1'b1), + // .TCENB(1'b0), + // .TWENB(128'b0), + // .TAB(5'b0), + // .TDB(128'b0), + // .RET1N(1'b1), + // .SIA(2'b0), + // .SEA(1'b0), + // .DFTRAMBYP(1'b0), + // .SIB(2'b0), + // .SEB(1'b0), + // .COLLDISN(1'b1) + // ); + // /* verilator lint_on PINCONNECTEMPTY */ + // /* verilator lint_off PINCONNECTEMPTY */ + // rf2_32x128_wm1 second_ram ( + // .CENYA(), + // .AYA(), + // .CENYB(), + // .WENYB(), + // .AYB(), + // .QA(out_b_reg_data), + // .SOA(), + // .SOB(), + // .CLKA(clk), + // .CENA(1'b0), + // .AA(VX_gpr_read.rs2), + // .CLKB(clk), + // .CENB(1'b0), + // .WENB(write_bit_mask), + // .AB(VX_writeback_inter.rd), + // .DB(VX_writeback_inter.write_data), + // .EMAA(3'b011), + // .EMASA(1'b0), + // .EMAB(3'b011), + // .TENA(1'b1), + // .TCENA(1'b0), + // .TAA(5'b0), + // .TENB(1'b1), + // .TCENB(1'b0), + // .TWENB(128'b0), + // .TAB(5'b0), + // .TDB(128'b0), + // .RET1N(1'b1), + // .SIA(2'b0), + // .SEA(1'b0), + // .DFTRAMBYP(1'b0), + // .SIB(2'b0), + // .SEB(1'b0), + // .COLLDISN(1'b1) + // ); endmodule diff --git a/rtl/VX_gpr_stage.v b/rtl/VX_gpr_stage.v index d1fa8e58..f11b5d00 100644 --- a/rtl/VX_gpr_stage.v +++ b/rtl/VX_gpr_stage.v @@ -1,6 +1,7 @@ module VX_gpr_stage ( input wire clk, input wire in_fwd_stall, + input wire schedule_delay, // inputs // Instruction Information VX_frE_to_bckE_req_inter VX_bckE_req, @@ -62,7 +63,7 @@ module VX_gpr_stage ( // assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0]; VX_gpr_data_inter VX_gpr_datf; - VX_generic_register #(.N(256)) d_e_reg + VX_generic_register #(.N(256)) reg_data ( .clk (clk), .reset(0), @@ -72,10 +73,12 @@ module VX_gpr_stage ( .out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data}) ); - VX_d_e_reg vx_d_e_reg( + wire stall = in_fwd_stall || schedule_delay; + + VX_d_e_reg gpr_stage_reg( .clk (clk), .reset (0), - .in_fwd_stall (in_fwd_stall), + .in_fwd_stall (stall), .in_branch_stall (0), .in_freeze (0), .in_gpr_stall (out_gpr_stall), diff --git a/rtl/VX_scheduler.v b/rtl/VX_scheduler.v index b713986e..938b7b9c 100644 --- a/rtl/VX_scheduler.v +++ b/rtl/VX_scheduler.v @@ -1,11 +1,50 @@ - +`include "VX_define.v" module VX_scheduler ( - input clk, - input + input wire clk, + VX_frE_to_bckE_req_inter VX_bckE_req, + VX_wb_inter VX_writeback_inter, + + output wire schedule_delay ); + + + reg rename_table[31:0]; + + initial begin + integer i; + for (i = 0; i < 32; i = i + 1) rename_table[i] = 0; + end + + wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0); + wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0); + + + // wire pass_through = ((VX_bckE_req.rs1 == VX_writeback_inter.rd) || (VX_bckE_req.rs2 == VX_writeback_inter.rd)) && valid_wb; + // wire pass_through = 0; + + wire rs1_rename = rename_table[VX_bckE_req.rs1]; + wire rs2_rename = rename_table[VX_bckE_req.rs2]; + + wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE); + + wire rs1_rename_qual = (rs1_rename && (VX_bckE_req.rs1 != 0)); + wire rs2_rename_qual = (rs2_rename && (VX_bckE_req.rs2 != 0) && ((VX_bckE_req.rs2_src == `RS2_REG) || is_store)); + + wire rename_valid = rs1_rename_qual || rs2_rename_qual ; + + + assign schedule_delay = (rename_valid) && (|VX_bckE_req.valid); + + + always @(posedge clk) begin + if (valid_wb ) rename_table[VX_writeback_inter.rd] <= 0; + if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.rd] <= 1; + end + + endmodule \ No newline at end of file diff --git a/rtl/Vortex.v b/rtl/Vortex.v index 9621ed02..73281b14 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -73,6 +73,7 @@ VX_warp_ctl_inter VX_warp_ctl(); wire out_gpr_stall; +wire schedule_delay; VX_front_end vx_front_end( @@ -81,11 +82,11 @@ VX_front_end vx_front_end( .VX_warp_ctl (VX_warp_ctl), .forwarding_fwd_stall(forwarding_fwd_stall), .execute_branch_stall(execute_branch_stall), - .VX_writeback_inter (VX_writeback_inter), .VX_bckE_req (VX_bckE_req), .decode_csr_address (decode_csr_address), .memory_delay (memory_delay), .fetch_delay (fetch_delay), + .schedule_delay (schedule_delay), .icache_response_fe (icache_response_fe), .icache_request_fe (icache_request_fe), .VX_jal_rsp (VX_jal_rsp), @@ -94,10 +95,17 @@ VX_front_end vx_front_end( .in_gpr_stall (out_gpr_stall) ); +VX_scheduler schedule( + .clk (clk), + .VX_bckE_req (VX_bckE_req), + .VX_writeback_inter(VX_writeback_inter), + .schedule_delay (schedule_delay) + ); VX_back_end vx_back_end( .clk (clk), .reset (reset), + .schedule_delay (schedule_delay), .fetch_delay (fetch_delay), .in_fwd_stall (forwarding_fwd_stall), .VX_fwd_req_de (VX_fwd_req_de), diff --git a/rtl/results.txt b/rtl/results.txt index d7d36007..1f44e536 100644 --- a/rtl/results.txt +++ b/rtl/results.txt @@ -1,7 +1,7 @@ -# Dynamic Instructions: 13 -# of total cycles: 24 +# Dynamic Instructions: 67875 +# of total cycles: 67891 # of forwarding stalls: 0 # of branch stalls: 0 -# CPI: 1.84615 -# time to simulate: 6.95312e-310 milliseconds +# CPI: 1.00024 +# time to simulate: 0 milliseconds # GRADE: Failed on test: 4294967295 diff --git a/rtl/tb_debug.h b/rtl/tb_debug.h index 6aae22b3..711663cc 100644 --- a/rtl/tb_debug.h +++ b/rtl/tb_debug.h @@ -1 +1 @@ -#define VCD_OUTPUT +#define VCD_OFF diff --git a/rtl/test_bench.h b/rtl/test_bench.h index 95ad0559..f42d1765 100644 --- a/rtl/test_bench.h +++ b/rtl/test_bench.h @@ -372,11 +372,11 @@ bool Vortex::simulate(std::string file_to_simulate) // unsigned cycles; counter = 0; this->stats_total_cycles = 10; - while (this->stop && ((counter < 5))) + while (this->stop && ((counter < 6))) // while (this->stats_total_cycles < 10) { // std::cout << "Counter: " << counter << "\n"; - if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; + // if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; // dstop = !dbus_driver(); vortex->clk = 1;