diff --git a/kernel/vortex_test.dump b/kernel/vortex_test.dump index db0311f7..179caf03 100644 --- a/kernel/vortex_test.dump +++ b/kernel/vortex_test.dump @@ -5,8 +5,8 @@ vortex_test.elf: file format elf32-littleriscv Disassembly of section .text: 80000000 <_start>: -80000000: 00400593 li a1,4 -80000004: 00b58633 add a2,a1,a1 +80000000: 7ffff137 lui sp,0x7ffff +80000004: 041010ef jal ra,80001844
80000008: 00000513 li a0,0 8000000c: 0005006b 0x5006b @@ -23,7 +23,7 @@ Disassembly of section .text: 80000024: 00755c63 bge a0,t2,8000003c 80000028 : -80000028: 80010113 addi sp,sp,-2048 +80000028: 80010113 addi sp,sp,-2048 # 7fffe800 8000002c: 00050313 mv t1,a0 80000030: 0003506b 0x3506b diff --git a/kernel/vortex_test.elf b/kernel/vortex_test.elf index 7bb309b6..f3408690 100755 Binary files a/kernel/vortex_test.elf and b/kernel/vortex_test.elf differ diff --git a/kernel/vortex_test.hex b/kernel/vortex_test.hex index 2f2c2f12..f40c918e 100644 --- a/kernel/vortex_test.hex +++ b/kernel/vortex_test.hex @@ -1,5 +1,5 @@ :0200000480007A -:10000000930540003386B500130500006B00050022 +:1000000037F1FF7FEF101004130500006B000500AF :10001000938B0600130D0700130F010093030500D7 :1000200013051000635C75001301018013030500C4 :100030006B500300130515006FF0DFFE1300000086 diff --git a/kernel/vx_os/vx_back/vx_back.s b/kernel/vx_os/vx_back/vx_back.s index 3c8fc894..1ff8b571 100644 --- a/kernel/vx_os/vx_back/vx_back.s +++ b/kernel/vx_os/vx_back/vx_back.s @@ -6,10 +6,10 @@ .type _start, @function .global _start _start: - li a1, 4 - add a2, a1, a1 - li a0, 0 - .word 0x0005006b # tmc a0 + # li a1, 4 + # add a2, a1, a1 + # li a0, 0 + # .word 0x0005006b # tmc a0 ########################### # la a0, 0x10000000 # li a1, 7 @@ -19,9 +19,9 @@ _start: # # li a1, 3 # # sw a1, 0(a0) - # la a0, 0x80000000 - # li a1, 9 - # sw a1, 0(a0) + # # la a0, 0x80000000 + # # li a1, 9 + # # sw a1, 0(a0) # # la a0, 0x80000008 # # li a1, 8 @@ -72,11 +72,11 @@ _start: # .word 0x0000306b #join # ecall ############################ - # lui sp, 0x7ffff - # # jal vx_before_main - # jal main - # li a0, 0 - # .word 0x0005006b # tmc a0 + lui sp, 0x7ffff + # jal vx_before_main + jal main + li a0, 0 + .word 0x0005006b # tmc a0 # Hi: # li a2, 7 diff --git a/models/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v b/models/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v index c2ec3c53..7113b498 100644 --- a/models/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v +++ b/models/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v @@ -47,6 +47,7 @@ // // Known Work Arounds: N/A // +`define ARM_UD_MODEL `timescale 1 ns/1 ps `define ARM_MEM_PROP 1.000 `define ARM_MEM_RETAIN 1.000 diff --git a/models/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v b/models/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v index 615e23d8..3e89b619 100644 --- a/models/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v +++ b/models/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v @@ -47,6 +47,8 @@ // // Known Work Arounds: N/A // +`define ARM_UD_MODEL + `timescale 1 ns/1 ps `define ARM_MEM_PROP 1.000 `define ARM_MEM_RETAIN 1.000 diff --git a/models/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v b/models/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v index f35efc03..3a9d05fa 100644 --- a/models/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v +++ b/models/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v @@ -47,6 +47,8 @@ // // Known Work Arounds: N/A // + +`define ARM_UD_MODEL `timescale 1 ns/1 ps `define ARM_MEM_PROP 1.000 `define ARM_MEM_RETAIN 1.000 diff --git a/models/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v b/models/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v index bd543e5a..58f6b141 100644 --- a/models/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v +++ b/models/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v @@ -47,19 +47,30 @@ // // Known Work Arounds: N/A // -`define ARM_UD_MODEL `timescale 1 ns/1 ps -`define ARM_MEM_PROP 1.000 -`define ARM_MEM_RETAIN 1.000 -`define ARM_MEM_PERIOD 3.000 -`define ARM_MEM_WIDTH 1.000 -`define ARM_MEM_SETUP 1.000 -`define ARM_MEM_HOLD 0.500 -`define ARM_MEM_COLLISION 3.000 +// `define ARM_MEM_PROP 1.000 +// `define ARM_MEM_RETAIN 1.000 +// `define ARM_MEM_PERIOD 3.000 +// `define ARM_MEM_WIDTH 1.000 +// `define ARM_MEM_SETUP 1.000 +// `define ARM_MEM_HOLD 0.500 +// `define ARM_MEM_COLLISION 3.000 +`define ARM_MEM_PROP 0 +`define ARM_MEM_RETAIN 0 +`define ARM_MEM_PERIOD 0 +`define ARM_MEM_WIDTH 0 +`define ARM_MEM_SETUP 0 +`define ARM_MEM_HOLD 0 +`define ARM_MEM_COLLISION 0 + // If ARM_HVM_MODEL is defined at Simulator Command Line, it Selects the Hierarchical Verilog Model `ifdef ARM_HVM_MODEL +`undef ARM_MESSAGES +`define ARM_UD_MODEL + +// ARM_MEM_SETUP, `ARM_MEM_HOLD, module datapath_latch_rf2_32x128_wm1 (CLK,Q_update,SE,SI,D,DFTRAMBYP,mem_path,XQ,Q); input CLK,Q_update,SE,SI,D,DFTRAMBYP,mem_path,XQ; diff --git a/rtl/VX_back_end.v b/rtl/VX_back_end.v index d5e349d6..64151420 100644 --- a/rtl/VX_back_end.v +++ b/rtl/VX_back_end.v @@ -1,3 +1,5 @@ +`include "VX_define.v" + module VX_back_end ( input wire clk, input wire reset, @@ -8,7 +10,6 @@ module VX_back_end ( VX_jal_response_inter VX_jal_rsp, VX_branch_response_inter VX_branch_rsp, - VX_frE_to_bckE_req_inter VX_bckE_req, VX_wb_inter VX_writeback_inter, diff --git a/rtl/VX_define.v b/rtl/VX_define.v index 76b28823..81750b5f 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -10,6 +10,7 @@ // `define ONLY // `define SYN 1 +`define ASIC 1 `define CACHE_NUM_BANKS 8 diff --git a/rtl/VX_gpr.v b/rtl/VX_gpr.v index a94ceae9..65969492 100644 --- a/rtl/VX_gpr.v +++ b/rtl/VX_gpr.v @@ -16,43 +16,31 @@ module VX_gpr ( wire write_enable; - assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)); + `ifndef ASIC + assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0); - // `ifndef SYN + byte_enabled_simple_dual_port_ram first_ram( + .we (write_enable), + .clk (clk), + .reset (reset), + .waddr (VX_writeback_inter.rd), + .raddr1(VX_gpr_read.rs1), + .raddr2(VX_gpr_read.rs2), + .be (VX_writeback_inter.wb_valid), + .wdata (VX_writeback_inter.write_data), + .q1 (out_a_reg_data), + .q2 (out_b_reg_data) + ); - // byte_enabled_simple_dual_port_ram first_ram( - // .we (write_enable), - // .clk (clk), - // .reset (reset), - // .waddr (VX_writeback_inter.rd), - // .raddr1(VX_gpr_read.rs1), - // .raddr2(VX_gpr_read.rs2), - // .be (VX_writeback_inter.wb_valid), - // .wdata (VX_writeback_inter.write_data), - // .q1 (out_a_reg_data), - // .q2 (out_b_reg_data) - // ); + `else - // `else + assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)); - wire writing_to_zero = (VX_writeback_inter.rd == 5'h0); - - - reg[31:0] use_before; wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid); - integer i; - always @(posedge clk) begin - if (reset) begin - use_before = 0; - end else if (going_to_write) begin - use_before[VX_writeback_inter.rd] = 1; - end - end - wire[`NT_M1:0][31:0] write_bit_mask; genvar curr_t; @@ -65,15 +53,15 @@ module VX_gpr ( wire cenb = !going_to_write; - wire cena_1 = (VX_gpr_read.rs1 == 0); - wire cena_2 = (VX_gpr_read.rs2 == 0); - + // wire cena_1 = (VX_gpr_read.rs1 == 0); + // wire cena_2 = (VX_gpr_read.rs2 == 0); + wire cena_1 = 0; + wire cena_2 = 0; wire[`NT_M1:0][31:0] temp_a; wire[`NT_M1:0][31:0] temp_b; - `ifndef SYN genvar thread; genvar curr_bit; for (thread = 0; thread < `NT; thread = thread + 1) @@ -84,18 +72,10 @@ module VX_gpr ( assign out_b_reg_data[thread][curr_bit] = (temp_b[thread][curr_bit] === 1'dx) ? 1'b0 : temp_b[thread][curr_bit]; end end - `else - assign out_a_reg_data = (cena_1 | !use_before[VX_gpr_read.rs1]) ? 0 : temp_a; - assign out_b_reg_data = (cena_2 | !use_before[VX_gpr_read.rs2]) ? 0 : temp_b; - `endif - wire[`NT_M1:0][31:0] to_write = writing_to_zero ? 0 : VX_writeback_inter.write_data; + wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; - // wire cena_1 = 0; - // wire cena_2 = 0; - - // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}}; /* verilator lint_off PINCONNECTEMPTY */ rf2_32x128_wm1 first_ram ( .CENYA(), @@ -173,6 +153,7 @@ module VX_gpr ( .COLLDISN(1'b1) ); /* verilator lint_on PINCONNECTEMPTY */ - // `endif + + `endif endmodule diff --git a/rtl/VX_gpr_stage.v b/rtl/VX_gpr_stage.v index 6276437a..f582bd01 100644 --- a/rtl/VX_gpr_stage.v +++ b/rtl/VX_gpr_stage.v @@ -12,6 +12,7 @@ module VX_gpr_stage ( // inputs // Instruction Information VX_frE_to_bckE_req_inter VX_bckE_req, + // WriteBack inputs VX_wb_inter VX_writeback_inter, @@ -40,9 +41,15 @@ module VX_gpr_stage ( assign VX_gpr_read.rs2 = VX_bckE_req.rs2; assign VX_gpr_read.warp_num = VX_bckE_req.warp_num; - VX_gpr_jal_inter VX_gpr_jal(); - assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual; - assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC; + `ifndef ASIC + VX_gpr_jal_inter VX_gpr_jal(); + assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual; + assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC; + `else + VX_gpr_jal_inter VX_gpr_jal(); + assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual; + assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC; + `endif VX_gpr_data_inter VX_gpr_datf(); @@ -86,9 +93,92 @@ module VX_gpr_stage ( wire stall_lsu = memory_delay; wire flush_lsu = schedule_delay && !stall_lsu; - assign gpr_stage_delay = stall_lsu; + `ifdef ASIC + wire delayed_lsu_last_cycle; + + VX_generic_register #(.N(1)) delayed_reg ( + .clk (clk), + .reset(reset), + .stall(stall_rest), + .flush(stall_rest), + .in (stall_lsu), + .out (delayed_lsu_last_cycle) + ); + + + wire[`NT_M1:0][31:0] temp_store_data; + wire[`NT_M1:0][31:0] temp_base_address; // A reg data + + wire[`NT_M1:0][31:0] real_store_data; + wire[`NT_M1:0][31:0] real_base_address; // A reg data + + wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; + + VX_generic_register #(.N(256)) lsu_data( + .clk (clk), + .reset(reset), + .stall(!store_curr_real), + .flush(stall_rest), + .in ({real_store_data, real_base_address}), + .out ({temp_store_data, temp_base_address}) + ); + + assign real_store_data = VX_lsu_req_temp.store_data; + assign real_base_address = VX_lsu_req_temp.base_address; + + + assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data; + assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; + + + VX_generic_register #(.N(52)) lsu_reg( + .clk (clk), + .reset(reset), + .stall(stall_lsu), + .flush(flush_lsu), + .in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}), + .out ({VX_lsu_req.valid , VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) + ); + + VX_generic_register #(.N(231)) exec_unit_reg( + .clk (clk), + .reset(reset), + .stall(stall_rest), + .flush(flush_rest), + .in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}), + .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) + ); + + assign VX_exec_unit_req.a_reg_data = real_base_address; + assign VX_exec_unit_req.b_reg_data = real_store_data; + + VX_generic_register #(.N(43)) gpu_inst_reg( + .clk (clk), + .reset(reset), + .stall(stall_rest), + .flush(flush_rest), + .in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}), + .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next }) + ); + + assign VX_gpu_inst_req.a_reg_data = real_base_address; + assign VX_gpu_inst_req.rd2 = real_store_data; + + VX_generic_register #(.N(60)) csr_reg( + .clk (clk), + .reset(reset), + .stall(stall_rest), + .flush(flush_rest), + .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), + .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) + ); + + + // assign + + `else VX_generic_register #(.N(308)) lsu_reg( .clk (clk), @@ -126,29 +216,6 @@ module VX_gpr_stage ( .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) ); - - // wire zero_temp = 0; - - // VX_generic_register #(.N(256)) reg_data - // ( - // .clk (clk), - // .reset(reset), - // .stall(zero_temp), - // .flush(zero_temp), - // .in ({VX_gpr_datf.a_reg_data, VX_gpr_datf.b_reg_data}), - // .out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data}) - // ); - - // wire stall = schedule_delay; - - - // VX_d_e_reg gpr_stage_reg( - // .clk (clk), - // .reset (reset), - // .in_branch_stall (stall), - // .in_freeze (zero_temp), - // .VX_frE_to_bckE_req(VX_bckE_req), - // .VX_bckE_req (VX_bckE_req_out) - // ); + `endif endmodule \ No newline at end of file diff --git a/rtl/Vortex.v b/rtl/Vortex.v index d476e7c4..d5b7b8ab 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -83,7 +83,6 @@ assign icache_request_pc_address = icache_request_fe.pc_address; // Front-end to Back-end VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM - // Back-end to Front-end VX_wb_inter VX_writeback_inter(); // Writeback to GPRs VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch diff --git a/rtl/modelsim/Makefile b/rtl/modelsim/Makefile index 9b628607..b6eb9776 100644 --- a/rtl/modelsim/Makefile +++ b/rtl/modelsim/Makefile @@ -84,7 +84,8 @@ SRC = \ CMD= \ --do "vcd file vortex.vcd; \ +-do "VoptFlow = 0; \ + vcd file vortex.vcd; \ vcd add -r /vortex_tb/*; \ vcd add -r /vortex/*; \ run -all; \ @@ -102,12 +103,13 @@ LOG= # vlib comp: - vlog $(OPT) -work $(LIB) $(SRC) + vlog -O0 $(OPT) -work $(LIB) $(SRC) # vlog -O0 -dpiheader vortex_dpi.h $(OPT) -work $(LIB) $(SRC) sim: comp - vsim vortex_tb $(LOG) -c -lib $(LIB) $(CMD) + vsim vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log + # vsim -novopt vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log diff --git a/rtl/modelsim/vortex_tb.v b/rtl/modelsim/vortex_tb.v index 24df426f..fc594daa 100644 --- a/rtl/modelsim/vortex_tb.v +++ b/rtl/modelsim/vortex_tb.v @@ -4,6 +4,8 @@ `define NUMBER_BANKS 8 `define NUM_WORDS_PER_BLOCK 4 +`define ARM_UD_MODEL + `timescale 1ns/1ps import "DPI-C" load_file = function void load_file(input string filename);