diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index 78d01303..34874437 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "scope.h" #include "vortex_afu.h" @@ -32,6 +33,20 @@ constexpr int ilog2(int n) { static constexpr int NW_BITS = ilog2(NUM_WARPS); static const scope_signal_t scope_signals[] = { + + { 32, "dram_req_addr" }, + { 1, "dram_req_rw" }, + { 16, "dram_req_byteen" }, + { 32, "dram_req_data" }, + { 29, "dram_req_tag" }, + { 32, "dram_rsp_data" }, + { 29, "dram_rsp_tag" }, + + { 32, "snp_req_addr" }, + { 1, "snp_req_invalidate" }, + { 16, "snp_req_tag" }, + { 16, "snp_rsp_tag" }, + { NW_BITS, "icache_req_warp_num" }, { 32, "icache_req_addr" }, { NW_BITS, "icache_req_tag" }, @@ -47,19 +62,6 @@ static const scope_signal_t scope_signals[] = { { NW_BITS, "dcache_req_tag" }, { 32, "dcache_rsp_data" }, { NW_BITS, "dcache_rsp_tag" }, - - { 32, "dram_req_addr" }, - { 1, "dram_req_rw" }, - { 16, "dram_req_byteen" }, - { 32, "dram_req_data" }, - { 29, "dram_req_tag" }, - { 32, "dram_rsp_data" }, - { 29, "dram_rsp_tag" }, - - { 32, "snp_req_addr" }, - { 1, "snp_req_invalidate" }, - { 16, "snp_req_tag" }, - { 16, "snp_rsp_tag" }, { NW_BITS, "decode_warp_num" }, { 32, "decode_curr_PC" }, @@ -78,6 +80,16 @@ static const scope_signal_t scope_signals[] = { { 32, "writeback_data" }, /////////////////////////////////////////////////////////////////////////// + + { 1, "dram_req_valid" }, + { 1, "dram_req_ready" }, + { 1, "dram_rsp_valid" }, + { 1, "dram_rsp_ready" }, + + { 1, "snp_req_valid" }, + { 1, "snp_req_ready" }, + { 1, "snp_rsp_valid" }, + { 1, "snp_rsp_ready" }, { 1, "icache_req_valid" }, { 1, "icache_req_ready" }, @@ -88,16 +100,6 @@ static const scope_signal_t scope_signals[] = { { 1, "dcache_req_ready" }, { NUM_THREADS, "dcache_rsp_valid" }, { 1, "dcache_rsp_ready" }, - - { 1, "dram_req_valid" }, - { 1, "dram_req_ready" }, - { 1, "dram_rsp_valid" }, - { 1, "dram_rsp_ready" }, - - { 1, "snp_req_valid" }, - { 1, "snp_req_ready" }, - { 1, "snp_rsp_valid" }, - { 1, "snp_rsp_ready" }, { NUM_THREADS, "decode_valid" }, { NUM_THREADS, "execute_valid" }, @@ -107,22 +109,20 @@ static const scope_signal_t scope_signals[] = { { 1, "exec_delay" }, { 1, "gpr_stage_delay" }, { 1, "busy" }, - - { 1, "idram_req_valid" }, - { 1, "idram_req_ready" }, - { 1, "idram_rsp_valid" }, - { 1, "idram_rsp_ready" }, }; static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) - return -1; - - // set start delay - uint64_t cmd_delay = ((delay << 3) | 4); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_delay)); + return -1; + + if (delay != uint64_t(-1)) { + // set start delay + uint64_t cmd_delay = ((delay << 3) | 4); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_delay)); + std::cout << "scope start delay: " << delay << std::endl; + } return 0; } @@ -130,10 +130,13 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) { int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) return -1; - - // stop recording - uint64_t cmd_stop = ((delay << 3) | 5); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_stop)); + + if (delay != uint64_t(-1)) { + // stop recording + uint64_t cmd_stop = ((delay << 3) | 5); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, cmd_stop)); + std::cout << "scope stop delay: " << delay << std::endl; + } std::ofstream ofs("vx_scope.vcd"); @@ -243,10 +246,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { } } } while ((frame_offset % 64) != 0); - } while (frame_no != max_frames); - std::cout << "scope trace dump done!" << std::endl; + std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl; // verify data not valid CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 0)); diff --git a/driver/opae/scope.h b/driver/opae/scope.h index 1836be84..0d9dd2d2 100644 --- a/driver/opae/scope.h +++ b/driver/opae/scope.h @@ -2,6 +2,6 @@ #include -int vx_scope_start(fpga_handle hfpga, uint64_t delay); +int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); -int vx_scope_stop(fpga_handle hfpga, uint64_t delay); \ No newline at end of file +int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1); \ No newline at end of file diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 55386122..7301e12d 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -112,14 +112,25 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, int errors = 0; // update source buffer - for (uint32_t i = 0; i < num_points; ++i) { - ((int32_t*)vx_host_ptr(buffer))[i] = i; + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i; + } } - - // write buffer to local memory - std::cout << "write buffer to local memory" << std::endl; + std::cout << "upload source buffer" << std::endl; RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0)); + // clear destination buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = 0xffffffff; + } + } + std::cout << "clear destination buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + // start device std::cout << "start device" << std::endl; RT_CHECK(vx_start(device)); @@ -132,11 +143,6 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, std::cout << "flush the caches" << std::endl; RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); - // clear destination buffer - for (uint32_t i = 0; i < num_points; ++i) { - ((int32_t*)vx_host_ptr(buffer))[i] = 0; - } - // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 234fbd5a..71f42e0b 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -181,13 +181,11 @@ int main(int argc, char *argv[]) { { auto buf_ptr = (int32_t*)vx_host_ptr(buffer); for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0; + buf_ptr[i] = 0xffffffff; } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); - - + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/hw/opae/README b/hw/opae/README index 6eb9f76c..853e01f7 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -70,8 +70,6 @@ run -all # compress FPGA output files tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` -tar -zcvf output_files_1c_rel.tar.gz `find ./build_fpga_1c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` -tar -zcvf output_files_2c_rel.tar.gz `find ./build_fpga_2c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` # compress VCD trace tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd @@ -87,4 +85,7 @@ ps -u tinebp kill -9 # fixing device resource busy issue when deleting /build_ase_1c/ -lsof +D build_ase_1c \ No newline at end of file +lsof +D build_ase_1c + +# quick off cache synthesis +make -C cache > cache/build.log 2>&1 & \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 7b43a733..09805c37 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -803,6 +803,8 @@ end `ifdef SCOPE `SCOPE_SIGNALS_DECL +localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}); +localparam SCOPE_SR_DEPTH = 2; `SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid); `SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); @@ -827,8 +829,6 @@ end `SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 626, "oops!") - wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) || ((| scope_dcache_req_valid) && scope_dcache_req_ready) @@ -839,20 +839,38 @@ wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_snp_rsp_valid && scope_snp_rsp_ready); wire scope_start = vx_reset; -wire scope_stop = 0; + +wire [SCOPE_DATAW+1:0] scope_data_in_st[SCOPE_SR_DEPTH-1:0]; +wire [SCOPE_DATAW+1:0] scope_data_in; +assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start}; +assign scope_data_in = scope_data_in_st[SCOPE_SR_DEPTH-1]; + +genvar i; +for (i = 1; i < SCOPE_SR_DEPTH; i++) begin + VX_generic_register #( + .N (SCOPE_DATAW+2) + ) scope_sr ( + .clk (clk), + .reset (SoftReset), + .stall (0), + .flush (0), + .in (scope_data_in_st[i-1]), + .out (scope_data_in_st[i]) + ); +end VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})), + .DATAW (SCOPE_DATAW), .BUSW (64), .SIZE (4096), .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) ) scope ( .clk (clk), .reset (SoftReset), - .start (scope_start), - .stop (scope_stop), - .changed (scope_changed), - .data_in ({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}), + .start (scope_data_in[0]), + .stop (0), + .changed (scope_data_in[1]), + .data_in (scope_data_in[SCOPE_DATAW+1:2]), .bus_in (csr_scope_cmd), .bus_out (csr_scope_data), .bus_read (csr_scope_read), diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index bc02a785..51e1bb65 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -13,8 +13,8 @@ module VX_alu_unit ( output reg [31:0] alu_result, output reg alu_stall ); - localparam div_pipeline_len = 20; - localparam mul_pipeline_len = 8; + localparam DIV_PIPELINE_LEN = 20; + localparam MUL_PIPELINE_LEN = 8; wire[31:0] unsigned_div_result; wire[31:0] unsigned_rem_result; @@ -31,7 +31,7 @@ module VX_alu_unit ( .WIDTHN(32), .WIDTHD(32), .SPEED("HIGHEST"), - .PIPELINE(div_pipeline_len) + .PIPELINE(DIV_PIPELINE_LEN) ) unsigned_div ( .clock(clk), .aclr(1'b0), @@ -48,7 +48,7 @@ module VX_alu_unit ( .NREP("SIGNED"), .DREP("SIGNED"), .SPEED("HIGHEST"), - .PIPELINE(div_pipeline_len) + .PIPELINE(DIV_PIPELINE_LEN) ) signed_div ( .clock(clk), .aclr(1'b0), @@ -65,7 +65,7 @@ module VX_alu_unit ( .WIDTHP(64), .SPEED("HIGHEST"), .FORCE_LE("YES"), - .PIPELINE(mul_pipeline_len) + .PIPELINE(MUL_PIPELINE_LEN) ) multiplier ( .clock(clk), .aclr(1'b0), @@ -93,11 +93,11 @@ module VX_alu_unit ( `ALU_DIV, `ALU_DIVU, `ALU_REM, - `ALU_REMU: curr_inst_delay = div_pipeline_len; + `ALU_REMU: curr_inst_delay = DIV_PIPELINE_LEN; `ALU_MUL, `ALU_MULH, `ALU_MULHSU, - `ALU_MULHU: curr_inst_delay = mul_pipeline_len; + `ALU_MULHU: curr_inst_delay = MUL_PIPELINE_LEN; default: curr_inst_delay = 0; endcase // alu_op end diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index 9456e056..6953bde5 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -10,27 +10,26 @@ module VX_gpr ( output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data, output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data ); - wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_uqual; - wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_uqual; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_unqual; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_unqual; - assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_uqual : 0; - assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_uqual : 0; + assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_unqual : 0; + assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_unqual : 0; - wire write_enable = write_ce && ((writeback_if.wb != 0)); + wire [`NUM_THREADS-1:0] write_enable = writeback_if.valid & {`NUM_THREADS{write_ce && (writeback_if.wb != 0)}}; `ifndef ASIC VX_gpr_ram gpr_ram ( - .we (write_enable), .clk (clk), .reset (reset), .waddr (writeback_if.rd), .raddr1 (gpr_read_if.rs1), .raddr2 (gpr_read_if.rs2), - .be (writeback_if.valid), + .we (write_enable), .wdata (writeback_if.data), - .q1 (a_reg_data_uqual), - .q2 (b_reg_data_uqual) + .q1 (a_reg_data_unqual), + .q2 (b_reg_data_unqual) ); `else @@ -55,13 +54,13 @@ module VX_gpr ( genvar j; for (i = 0; i < `NUM_THREADS; i++) begin for (j = 0; j < `NUM_GPRS; j++) begin - assign a_reg_data_uqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; - assign b_reg_data_uqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; + assign a_reg_data_unqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; + assign b_reg_data_unqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; end end `else - assign a_reg_data_uqual = tmp_a; - assign b_reg_data_uqual = tmp_b; + assign a_reg_data_unqual = tmp_a; + assign b_reg_data_unqual = tmp_b; `endif wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data; diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 7977f839..4b2e5d01 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -3,27 +3,24 @@ module VX_gpr_ram ( input wire clk, input wire reset, - input wire we, input wire [4:0] waddr, input wire [4:0] raddr1, input wire [4:0] raddr2, - input wire [`NUM_THREADS-1:0] be, + input wire [`NUM_THREADS-1:0] we, input wire [`NUM_THREADS-1:0][31:0] wdata, output reg [`NUM_THREADS-1:0][31:0] q1, output reg [`NUM_THREADS-1:0][31:0] q2 ); reg [`NUM_THREADS-1:0][31:0] ram[31:0]; - integer i; - `UNUSED_VAR(reset) - always @(posedge clk) begin - if (we) begin - for (i = 0; i < `NUM_THREADS; i++) begin - if (be[i]) begin - ram[waddr][i] <= wdata[i]; - end + genvar i; + + for (i = 0; i < `NUM_THREADS; i++) begin + always @(posedge clk) begin + if (we[i]) begin + ram[waddr][i] <= wdata[i]; end end end diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 81e7ef1b..91f04ca3 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -42,19 +42,16 @@ module VX_icache_stage #( .full (mrq_full), .pop (mrq_pop), .read_addr (mrq_read_addr), - .read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num}) + .read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num}), + `UNUSED_PIN (empty) ); always @(posedge clk) begin - if (reset) begin - //-- - end else begin - if (mrq_push) begin - valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid; - end - if (mrq_pop) begin - assert(mrq_read_addr == dbg_mrq_write_addr); - end + if (mrq_push) begin + valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid; + end + if (mrq_pop) begin + assert(mrq_read_addr == dbg_mrq_write_addr); end end diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 0d0f5e16..4dcd15df 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -105,20 +105,17 @@ module VX_lsu_unit #( .full (mrq_full), .pop (mrq_pop), .read_addr (mrq_read_addr), - .read_data ({dbg_mrq_write_addr, mem_wb_if.curr_PC, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num}) + .read_data ({dbg_mrq_write_addr, mem_wb_if.curr_PC, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num}), + `UNUSED_PIN (empty) ); always @(posedge clk) begin - if (reset) begin - //-- - end else begin - if (mrq_push) begin - mem_rsp_mask[mrq_write_addr] <= use_valid; - end - if (mrq_pop_part) begin - mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd; - assert(mrq_read_addr == dbg_mrq_write_addr); - end + if (mrq_push) begin + mem_rsp_mask[mrq_write_addr] <= use_valid; + end + if (mrq_pop_part) begin + mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd; + assert(mrq_read_addr == dbg_mrq_write_addr); end end diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 9744533e..4dcc45ca 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -57,7 +57,7 @@ module VX_scheduler ( reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : - count_valid; + count_valid; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index b2bcf632..d2c9dcd0 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -4,6 +4,17 @@ `ifdef SCOPE `define SCOPE_SIGNALS_DATA_LIST \ + scope_dram_req_addr, \ + scope_dram_req_rw, \ + scope_dram_req_byteen, \ + scope_dram_req_data, \ + scope_dram_req_tag, \ + scope_dram_rsp_data, \ + scope_dram_rsp_tag, \ + scope_snp_req_addr, \ + scope_snp_req_invalidate, \ + scope_snp_req_tag, \ + scope_snp_rsp_tag, \ scope_icache_req_warp_num, \ scope_icache_req_addr, \ scope_icache_req_tag, \ @@ -18,17 +29,6 @@ scope_dcache_req_tag, \ scope_dcache_rsp_data, \ scope_dcache_rsp_tag, \ - scope_dram_req_addr, \ - scope_dram_req_rw, \ - scope_dram_req_byteen, \ - scope_dram_req_data, \ - scope_dram_req_tag, \ - scope_dram_rsp_data, \ - scope_dram_rsp_tag, \ - scope_snp_req_addr, \ - scope_snp_req_invalidate, \ - scope_snp_req_tag, \ - scope_snp_rsp_tag, \ scope_decode_warp_num, \ scope_decode_curr_PC, \ scope_decode_is_jal, \ @@ -45,14 +45,6 @@ `define SCOPE_SIGNALS_UPD_LIST \ - scope_icache_req_valid, \ - scope_icache_req_ready, \ - scope_icache_rsp_valid, \ - scope_icache_rsp_ready, \ - scope_dcache_req_valid, \ - scope_dcache_req_ready, \ - scope_dcache_rsp_valid, \ - scope_dcache_rsp_ready, \ scope_dram_req_valid, \ scope_dram_req_ready, \ scope_dram_rsp_valid, \ @@ -61,6 +53,14 @@ scope_snp_req_ready, \ scope_snp_rsp_valid, \ scope_snp_rsp_ready, \ + scope_icache_req_valid, \ + scope_icache_req_ready, \ + scope_icache_rsp_valid, \ + scope_icache_rsp_ready, \ + scope_dcache_req_valid, \ + scope_dcache_req_ready, \ + scope_dcache_rsp_valid, \ + scope_dcache_rsp_ready, \ scope_decode_valid, \ scope_execute_valid, \ scope_writeback_valid, \ @@ -68,13 +68,27 @@ scope_memory_delay, \ scope_exec_delay, \ scope_gpr_stage_delay, \ - scope_busy, \ - scope_idram_req_valid, \ - scope_idram_req_ready, \ - scope_idram_rsp_valid, \ - scope_idram_rsp_ready + scope_busy `define SCOPE_SIGNALS_DECL \ + wire scope_dram_req_valid; \ + wire [31:0] scope_dram_req_addr; \ + wire scope_dram_req_rw; \ + wire [15:0] scope_dram_req_byteen; \ + wire [31:0] scope_dram_req_data; \ + wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ + wire scope_dram_req_ready; \ + wire scope_dram_rsp_valid; \ + wire [31:0] scope_dram_rsp_data; \ + wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ + wire scope_dram_rsp_ready; \ + wire scope_snp_req_valid; \ + wire [31:0] scope_snp_req_addr; \ + wire scope_snp_req_invalidate; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ + wire scope_snp_req_ready; \ + wire scope_snp_rsp_valid; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ wire scope_icache_req_valid; \ wire [`NW_BITS-1:0] scope_icache_req_warp_num; \ wire [31:0] scope_icache_req_addr; \ @@ -97,24 +111,6 @@ wire [31:0] scope_dcache_rsp_data; \ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ wire scope_dcache_rsp_ready; \ - wire scope_dram_req_valid; \ - wire [31:0] scope_dram_req_addr; \ - wire scope_dram_req_rw; \ - wire [15:0] scope_dram_req_byteen; \ - wire [31:0] scope_dram_req_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ - wire scope_dram_req_ready; \ - wire scope_dram_rsp_valid; \ - wire [31:0] scope_dram_rsp_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ - wire scope_dram_rsp_ready; \ - wire scope_snp_req_valid; \ - wire [31:0] scope_snp_req_addr; \ - wire scope_snp_req_invalidate; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ - wire scope_snp_req_ready; \ - wire scope_snp_rsp_valid; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ wire scope_busy; \ wire scope_snp_rsp_ready; \ wire scope_schedule_delay; \ @@ -136,11 +132,7 @@ wire [`NW_BITS-1:0] scope_writeback_warp_num; \ wire [1:0] scope_writeback_wb; \ wire [4:0] scope_writeback_rd; \ - wire [31:0] scope_writeback_data; \ - wire scope_idram_req_valid; \ - wire scope_idram_req_ready; \ - wire scope_idram_rsp_valid; \ - wire scope_idram_rsp_ready; + wire [31:0] scope_writeback_data; `define SCOPE_SIGNALS_ISTAGE_IO \ output wire scope_icache_req_valid, \ @@ -171,10 +163,6 @@ `define SCOPE_SIGNALS_CORE_IO \ `define SCOPE_SIGNALS_ICACHE_IO \ - output wire scope_idram_req_valid, \ - output wire scope_idram_req_ready, \ - output wire scope_idram_rsp_valid, \ - output wire scope_idram_rsp_ready, `define SCOPE_SIGNALS_PIPELINE_IO \ output wire scope_busy, \ @@ -230,10 +218,6 @@ `define SCOPE_SIGNALS_CORE_BIND \ `define SCOPE_SIGNALS_ICACHE_BIND \ - .scope_idram_req_valid (scope_idram_req_valid), \ - .scope_idram_req_ready (scope_idram_req_ready), \ - .scope_idram_rsp_valid (scope_idram_rsp_valid), \ - .scope_idram_rsp_ready (scope_idram_rsp_ready), `define SCOPE_SIGNALS_PIPELINE_BIND \ .scope_busy (scope_busy), \ diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 3c439ffb..b3b56741 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -484,11 +484,6 @@ module VX_cache #( .snp_rsp_valid (snp_rsp_valid), .snp_rsp_tag (snp_rsp_tag), .snp_rsp_ready (snp_rsp_ready) - ); - - `SCOPE_ASSIGN(scope_idram_req_valid, per_bank_dram_fill_req_valid[0]); - `SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready); - `SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]); - `SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]); + ); endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 2c8e5883..25fc234b 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -93,7 +93,14 @@ module VX_cache_miss_resrv #( assign miss_resrv_valid_st0 = dequeue_possible; assign miss_resrv_addr_st0 = addr_table[dequeue_index]; - assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; + assign {miss_resrv_data_st0, + miss_resrv_tid_st0, + miss_resrv_tag_st0, + miss_resrv_rw_st0, + miss_resrv_byteen_st0, + miss_resrv_wsel_st0, + miss_resrv_is_snp_st0, + miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; wire mrvq_push = miss_add && enqueue_possible && !from_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 1552929f..7af7f80c 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -67,24 +67,21 @@ module VX_snp_forwarder #( .reset (reset), .write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}), .write_addr (sfq_write_addr), - .push (sfq_push), - .full (sfq_full), - .pop (sfq_pop), + .push (sfq_push), + .pop (sfq_pop), + .full (sfq_full), .read_addr (sfq_read_addr), - .read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}) + .read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}), + `UNUSED_PIN (empty) ); always @(posedge clk) begin - if (reset) begin - //-- - end else begin - if (sfq_push) begin - pending_cntrs[sfq_write_addr] <= NUM_REQUESTS; - end - if (fwdin_fire) begin - pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; - assert(sfq_read_addr == dbg_sfq_write_addr); - end + if (sfq_push) begin + pending_cntrs[sfq_write_addr] <= NUM_REQUESTS; + end + if (fwdin_fire) begin + pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; + assert(sfq_read_addr == dbg_sfq_write_addr); end end diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index bbb791ac..388e3cf6 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -111,7 +111,7 @@ module VX_tag_data_access #( VX_generic_register #( .N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH), - .PassThru(1) + .PASSTHRU(1) ) s0_1_c0 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divide.v index 948c8d80..4862a542 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divide.v @@ -104,8 +104,8 @@ module VX_divide #( remainder = 0; end else begin - quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end)); - remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end)); + quotient = $signed(numer_pipe_end) / $signed(denom_pipe_end); + remainder = $signed(numer_pipe_end) % $signed(denom_pipe_end); end end end diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 702daf55..0c1fb742 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -60,12 +60,12 @@ module VX_generic_queue #( if (0 == BUFFERED_OUTPUT) begin - reg [`LOG2UP(SIZE):0] wr_ptr_r; reg [`LOG2UP(SIZE):0] rd_ptr_r; - - wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; + reg [`LOG2UP(SIZE):0] wr_ptr_r; + wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0]; - + wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; + always @(posedge clk) begin if (reset) begin rd_ptr_r <= 0; @@ -108,12 +108,14 @@ module VX_generic_queue #( always @(posedge clk) begin if (reset) begin + size_r <= 0; + head_r <= 0; + curr_r <= 0; wr_ptr_r <= 0; rd_ptr_r <= 0; rd_ptr_next_r <= 1; empty_r <= 1; - full_r <= 0; - size_r <= 0; + full_r <= 0; end else begin if (writing) begin data[wr_ptr_r] <= data_in; diff --git a/hw/rtl/libs/VX_generic_register.v b/hw/rtl/libs/VX_generic_register.v index 8f1036de..869efdd4 100644 --- a/hw/rtl/libs/VX_generic_register.v +++ b/hw/rtl/libs/VX_generic_register.v @@ -2,7 +2,7 @@ module VX_generic_register #( parameter N, - parameter PassThru = 0 + parameter PASSTHRU = 0 ) ( input wire clk, input wire reset, @@ -23,6 +23,6 @@ module VX_generic_register #( end end - assign out = PassThru ? in : value; + assign out = PASSTHRU ? in : value; endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_indexable_queue.v b/hw/rtl/libs/VX_indexable_queue.v index c655331f..f18e067e 100644 --- a/hw/rtl/libs/VX_indexable_queue.v +++ b/hw/rtl/libs/VX_indexable_queue.v @@ -9,18 +9,18 @@ module VX_indexable_queue #( input wire [DATAW-1:0] write_data, output wire [`LOG2UP(SIZE)-1:0] write_addr, input wire push, - output wire full, - input wire pop, + output wire full, + output wire empty, input wire [`LOG2UP(SIZE)-1:0] read_addr, output wire [DATAW-1:0] read_data ); reg [DATAW-1:0] data [SIZE-1:0]; - reg valid [SIZE-1:0]; + reg [SIZE-1:0] valid; reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr; wire [`LOG2UP(SIZE)-1:0] rd_a, wr_a; - wire enqueue, dequeue, empty; + wire enqueue, dequeue; assign rd_a = rd_ptr[`LOG2UP(SIZE)-1:0]; assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; @@ -31,10 +31,13 @@ module VX_indexable_queue #( assign enqueue = push && ~full; assign dequeue = ~empty && ~valid[rd_a]; // auto-remove when head is invalid + integer i; + always @(posedge clk) begin if (reset) begin rd_ptr <= 0; wr_ptr <= 0; + valid <= 0; end else begin if (enqueue) begin data[wr_a] <= write_data; diff --git a/hw/rtl/libs/VX_mult.v b/hw/rtl/libs/VX_mult.v index 3b7aaf3b..03187b8a 100644 --- a/hw/rtl/libs/VX_mult.v +++ b/hw/rtl/libs/VX_mult.v @@ -107,7 +107,7 @@ module VX_mult #( /* * * * * * * * * * * * * * * * * * * * * * */ if (REP == "SIGNED") begin - assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end)); + assign result = $signed(dataa_pipe_end) * $signed(datab_pipe_end); end else begin assign result = dataa_pipe_end * datab_pipe_end; diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index a9bfbbcb..7ec7b5f3 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -58,21 +58,21 @@ module VX_scope #( always @(posedge clk) begin if (reset) begin + out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); raddr <= 0; waddr <= 0; + waddr_end <= $bits(waddr)'(SIZE-1); + started <= 0; start_wait <= 0; recording <= 0; - delay_cntr <= 0; - read_offset <= 0; - data_valid <= 0; - out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); delay_val <= 0; - waddr_end <= $bits(waddr)'(SIZE-1); + delay_cntr <= 0; delta <= 0; - prev_trigger_id <= 0; - read_delta <= 0; - started <= 0; delta_flush <= 0; + prev_trigger_id <= 0; + read_offset <= 0; + read_delta <= 0; + data_valid <= 0; end else begin if (bus_write) begin @@ -88,12 +88,12 @@ module VX_scope #( end if (start && !started) begin - started <= 1; + started <= 1; + delta_flush <= 1; if (0 == delay_val) begin start_wait <= 0; recording <= 1; - delay_cntr <= 0; - delta_flush <= 1; + delay_cntr <= 0; end else begin start_wait <= 1; recording <= 0; @@ -104,9 +104,8 @@ module VX_scope #( if (start_wait) begin delay_cntr <= delay_cntr - 1; if (1 == delay_cntr) begin - start_wait <= 0; - recording <= 1; - delta_flush <= 1; + start_wait <= 0; + recording <= 1; end end @@ -181,7 +180,7 @@ module VX_scope #( `ifdef DBG_PRINT_SCOPE always @(posedge clk) begin if (bus_read) begin - $display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr); + $display("%t: scope-read: cmd=%0d, out=%0h, addr=%0d", $time, out_cmd, bus_out, raddr); end if (bus_write) begin $display("%t: scope-write: cmd=%0d, value=%0d", $time, cmd_type, cmd_data); diff --git a/simX/cache_simX.v b/simX/cache_simX.v index a00613e3..34174be3 100644 --- a/simX/cache_simX.v +++ b/simX/cache_simX.v @@ -75,7 +75,7 @@ module cache_simX ( .VX_icache_rsp (VX_icache_rsp), .VX_dcache_req (VX_dcache_req), .VX_dcache_rsp (VX_dcache_rsp) - ); + ); always @(posedge clk, posedge reset) begin if (reset)