diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index 71b848a6..068155cc 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -80,6 +80,7 @@ static const scope_signal_t scope_signals[] = { { 1, "memory_delay" }, { 1, "exec_delay" }, { 1, "gpr_stage_delay" }, + { 1, "busy" }, }; static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); @@ -131,13 +132,13 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 2)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width)); - std::cout << "scope::frame_width=" << frame_width << std::endl; + std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl; assert(fwidth == (int)frame_width); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 3)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames)); - std::cout << "scope::max_frames=" << max_frames << std::endl; + std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl; CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1)); diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 2f7282fe..6e51a568 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -120,6 +120,7 @@ extern int vx_dev_open(vx_device_h* hdevice) { #ifdef SCOPE { + int ret = vx_scope_start(device->fpga, 0); if (ret != 0) return ret; diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 146880f5..b7548092 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -20,7 +20,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 #DEBUG=1 -AFU=1 +#AFU=1 CFLAGS += -fPIC diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index cdd3dcc0..38de9c81 100644 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 2e1182ea..08f76375 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -1,7 +1,7 @@ vortex_afu.json +define+NDEBUG -+define+SCOPE +#+define+SCOPE #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index f4b8a716..21cf923c 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -204,7 +204,7 @@ begin end MMIO_CSR_SCOPE_CMD: begin `ifdef DBG_PRINT_OPAE - $display("%t: CSR_SCOPE_CMD: %0d", $time, 64'(cp2af_sRxPort.c0.data)); + $display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data)); `endif end default: begin @@ -246,7 +246,7 @@ begin MMIO_CSR_SCOPE_DATA: begin mmio_tx.data <= csr_scope_data; `ifdef DBG_PRINT_OPAE - $display("%t: SCOPE: data=%0d", $time, csr_scope_data); + $display("%t: SCOPE: data=%0h", $time, csr_scope_data); `endif end default: mmio_tx.data <= 64'h0; @@ -815,9 +815,9 @@ end `SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 490, "oops!") +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 491, "oops!") -wire force_changed = (scope_icache_req_valid && scope_icache_req_ready) +wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) || ((| scope_dcache_req_valid) && scope_dcache_req_ready) || ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready) @@ -826,6 +826,9 @@ wire force_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_snp_req_valid && scope_snp_req_ready) || (scope_snp_rsp_valid && scope_snp_rsp_ready); +wire scope_start = vx_reset; +wire scope_stop = 0; + VX_scope #( .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})), .BUSW (64), @@ -834,9 +837,9 @@ VX_scope #( ) scope ( .clk (clk), .reset (SoftReset), - .start (vx_reset), - .stop (0), - .changed (force_changed), + .start (scope_start), + .stop (scope_stop), + .changed (scope_changed), .data_in ({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}), .bus_in (csr_scope_cmd), .bus_out (csr_scope_data), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 526b6307..f4185a46 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -345,7 +345,8 @@ scope_schedule_delay, \ scope_memory_delay, \ scope_exec_delay, \ - scope_gpr_stage_delay + scope_gpr_stage_delay, \ + scope_busy `define SCOPE_SIGNALS_DECL \ wire scope_icache_req_valid; \ @@ -380,6 +381,7 @@ wire scope_snp_req_ready; \ wire scope_snp_rsp_valid; \ wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ + wire scope_busy; \ wire scope_snp_rsp_ready; \ wire scope_schedule_delay; \ wire scope_memory_delay; \ @@ -453,6 +455,7 @@ `define SCOPE_SIGNALS_CORE_IO \ /* verilator lint_off UNDRIVEN */ \ + output wire scope_busy, \ output wire scope_schedule_delay, \ output wire scope_memory_delay, \ output wire scope_exec_delay, \ @@ -521,28 +524,29 @@ .scope_snp_rsp_ready (scope_snp_rsp_ready), `define SCOPE_SIGNALS_CORE_ATTACH \ + .scope_busy (scope_busy), \ .scope_schedule_delay (scope_schedule_delay), \ .scope_memory_delay (scope_memory_delay), \ .scope_exec_delay (scope_exec_delay), \ .scope_gpr_stage_delay (scope_gpr_stage_delay), `define SCOPE_SIGNALS_BE_ATTACH \ - .scope_decode_valid (scope_decode_valid), \ - .scope_decode_warp_num (scope_decode_warp_num), \ - .scope_decode_curr_PC (scope_decode_curr_PC), \ - .scope_decode_is_jal (scope_decode_is_jal), \ - .scope_decode_rs1 (scope_decode_rs1), \ - .scope_decode_rs2 (scope_decode_rs2), \ - .scope_execute_valid (scope_execute_valid), \ - .scope_execute_warp_num (scope_execute_warp_num), \ - .scope_execute_rd (scope_execute_rd), \ - .scope_execute_a (scope_execute_a), \ - .scope_execute_b (scope_execute_b), \ - .scope_writeback_valid (scope_writeback_valid), \ - .scope_writeback_warp_num (scope_writeback_warp_num), \ - .scope_writeback_wb (scope_writeback_wb), \ - .scope_writeback_rd (scope_writeback_rd), \ - .scope_writeback_data (scope_writeback_data), + .scope_decode_valid (scope_decode_valid), \ + .scope_decode_warp_num (scope_decode_warp_num), \ + .scope_decode_curr_PC (scope_decode_curr_PC), \ + .scope_decode_is_jal (scope_decode_is_jal), \ + .scope_decode_rs1 (scope_decode_rs1), \ + .scope_decode_rs2 (scope_decode_rs2), \ + .scope_execute_valid (scope_execute_valid), \ + .scope_execute_warp_num (scope_execute_warp_num), \ + .scope_execute_rd (scope_execute_rd), \ + .scope_execute_a (scope_execute_a), \ + .scope_execute_b (scope_execute_b), \ + .scope_writeback_valid (scope_writeback_valid), \ + .scope_writeback_warp_num (scope_writeback_warp_num), \ + .scope_writeback_wb (scope_writeback_wb), \ + .scope_writeback_rd (scope_writeback_rd), \ + .scope_writeback_data (scope_writeback_data), `define SCOPE_ASSIGN(d,s) assign d = s `else diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index ab63ceb5..980b3b77 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -93,7 +93,7 @@ module VX_mem_arb #( assign in_mem_rsp_data[i] = out_mem_rsp_data; assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH]; end - assign out_mem_rsp_ready = in_mem_rsp_ready[bus_rsp_sel]; + assign out_mem_rsp_ready = out_mem_rsp_valid ? in_mem_rsp_ready[bus_rsp_sel] : 0; end diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index e8052cff..f5c937eb 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -174,6 +174,7 @@ module VX_pipeline #( assign core_icache_rsp_if.core_rsp_tag = icache_rsp_tag; assign icache_rsp_ready = core_icache_rsp_if.core_rsp_ready; + `SCOPE_ASSIGN(scope_busy, busy); `SCOPE_ASSIGN(scope_schedule_delay, schedule_delay); `SCOPE_ASSIGN(scope_memory_delay, memory_delay); `SCOPE_ASSIGN(scope_exec_delay, exec_delay); diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 55d015f8..2020ea87 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -19,7 +19,7 @@ module VX_scope #( input wire bus_read ); localparam DELTA_ENABLE = (UPDW != 0); - localparam MAX_DELTA = (1**DELTAW)-1; + localparam MAX_DELTA = (2 ** DELTAW) - 1; typedef enum logic[2:0] { CMD_GET_VALID, @@ -41,14 +41,14 @@ module VX_scope #( reg [DATAW-1:0] data_store [SIZE-1:0]; reg [DELTAW-1:0] delta_store [SIZE-1:0]; - reg [UPDW-1:0] prev_id; + reg [UPDW-1:0] prev_trigger_id; reg [DELTAW-1:0] delta; reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end; reg [`LOG2UP(DATAW)-1:0] read_offset; - reg start_wait, recording, data_valid, read_delta; + reg start_wait, recording, data_valid, read_delta, started, delta_flush; reg [BUSW-3:0] delay_val, delay_cntr; @@ -62,18 +62,21 @@ module VX_scope #( always @(posedge clk) begin if (reset) begin - raddr <= 0; - waddr <= 0; - start_wait <= 0; - recording <= 0; - delay_cntr <= 0; - read_offset <= 0; - data_valid <= 0; - out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); - delay_val <= 0; - waddr_end <= $bits(waddr)'(SIZE-1); - delta <= 0; - read_delta <= 0; + raddr <= 0; + waddr <= 0; + start_wait <= 0; + recording <= 0; + delay_cntr <= 0; + read_offset <= 0; + data_valid <= 0; + out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); + delay_val <= 0; + waddr_end <= $bits(waddr)'(SIZE-1); + delta <= 0; + prev_trigger_id <= 0; + read_delta <= 0; + started <= 0; + delta_flush <= 0; end else begin if (bus_write) begin @@ -88,13 +91,13 @@ module VX_scope #( endcase end - if (start) begin - waddr <= 0; + if (start && !started) begin + started <= 1; if (0 == delay_val) begin - start_wait <= 0; - recording <= 1; - delay_cntr <= 0; - delta <= MAX_DELTA; + start_wait <= 0; + recording <= 1; + delay_cntr <= 0; + delta_flush <= 1; end else begin start_wait <= 1; recording <= 0; @@ -105,25 +108,27 @@ module VX_scope #( if (start_wait) begin delay_cntr <= delay_cntr - 1; if (1 == delay_cntr) begin - start_wait <= 0; - recording <= 1; - delta <= MAX_DELTA; + start_wait <= 0; + recording <= 1; + delta_flush <= 1; end end if (recording) begin if (DELTA_ENABLE) begin - if (changed - || (delta == MAX_DELTA) - || (trigger_id != prev_id)) begin + if (delta_flush + || changed + || (trigger_id != prev_trigger_id)) begin data_store[waddr] <= data_in; delta_store[waddr] <= delta; - waddr <= waddr + 1; - delta <= 0; + waddr <= waddr + 1; + delta <= 0; + delta_flush <= 0; end else begin - delta <= delta + 1; + delta <= delta + 1; + delta_flush <= (delta == (MAX_DELTA-1)); end - prev_id <= trigger_id; + prev_trigger_id <= trigger_id; end else begin data_store[waddr] <= data_in; waddr <= waddr + 1; @@ -131,7 +136,7 @@ module VX_scope #( if (stop || (waddr >= waddr_end)) begin - waddr <= waddr; // keep last written address + waddr <= waddr; // keep last address recording <= 0; data_valid <= 1; read_delta <= DELTA_ENABLE; @@ -172,14 +177,15 @@ module VX_scope #( GET_VALID : bus_out = BUSW'(data_valid); GET_WIDTH : bus_out = BUSW'(DATAW); GET_COUNT : bus_out = BUSW'(waddr) + BUSW'(1); - default : bus_out = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset); + GET_DATA : bus_out = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset); + default : bus_out = 0; endcase end `ifdef DBG_PRINT_SCOPE always_ff @(posedge clk) begin if (bus_read) begin - $display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d, off=%0d", $time, out_cmd, bus_out, raddr, read_offset); + $display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr); end if (bus_write) begin $display("%t: scope-write: cmd=%0d, value=%0d", $time, cmd_type, cmd_data);