diff --git a/hw/opae/Makefile b/hw/opae/Makefile index 7ac422aa..a4a7ec6b 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -15,7 +15,7 @@ setup-ase: $(ASE_BUILD_DIR)/Makefile setup-fpga: $(FPGA_BUILD_DIR)/build/dcp.qpf $(ASE_BUILD_DIR)/Makefile: - afu_sim_setup --s sources.txt $(ASE_BUILD_DIR) + afu_sim_setup -s sources.txt $(ASE_BUILD_DIR) $(FPGA_BUILD_DIR)/build/dcp.qpf: afu_synth_setup -s sources.txt $(FPGA_BUILD_DIR) diff --git a/hw/opae/README b/hw/opae/README index e0ed668d..ad4d25f3 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -7,7 +7,7 @@ source /export/fpga/bin/setup-fpga-env fpga-pac-a10 ## Vortex Run commands ## ######################### ## Synthesis -cd /driver/hw/ +cd /driver/hw/opae # Configure a Quartus build area afu_synth_setup -s sources.txt build_fpga cd build_fpga @@ -47,6 +47,9 @@ source /export/fpga/bin/setup-fpga-env fpga-pac-a10 # Acquire a sever node for running ASE simulations qsub-sim +# test +./run_ase.sh ../../driver/tests/basic/basic + # modify "vsim_run.tcl" to dump VCD trace vcd file vortex.vcd vcd add -r /*/Vortex/hw/rtl/* @@ -61,9 +64,6 @@ tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz # launch Gtkwave gtkwave ./build_ase/work/vortex.vcd & -# test -./run_ase.sh ../../driver/tests/basic/basic - # kill process by Users ps -u tinebp kill -9 \ No newline at end of file diff --git a/hw/opae/run_ase.sh b/hw/opae/run_ase.sh index 147f1147..cb5b5fe0 100755 --- a/hw/opae/run_ase.sh +++ b/hw/opae/run_ase.sh @@ -10,12 +10,14 @@ export ASE_WORKDIR=$SCRIPT_DIR/build_ase/work shift 1 # cleanup incomplete runs -rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid +rm -f $ASE_WORKDIR/.app_lock.pid +rm -f $ASE_WORKDIR/.ase_ready.pid +rm -f $SCRIPT_DIR/build_ase/nohup.out # Start Simulator in background pushd $SCRIPT_DIR/build_ase -echo " [DBG] starting ASE simnulator" -nohup make sim & +echo " [DBG] starting ASE simnulator (stdout saved to '$SCRIPT_DIR/build_ase/nohup.out')" +nohup make sim & popd # Wait for simulator readiness diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 73f6f739..b67ac373 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -2,6 +2,9 @@ vortex_afu.json +define+GLOBAL_BLOCK_SIZE=64 +#+define+NUM_CORES=2 +#+define+DL2_ENABLE=0 + +incdir+. +incdir+../rtl +incdir+../rtl/interfaces @@ -23,9 +26,10 @@ vortex_afu.json ../rtl/cache/VX_cache_req_queue.v ../rtl/cache/VX_cache_miss_resrv.v ../rtl/cache/VX_fill_invalidator.v -../rtl/cache/VX_snp_fwd_arb.v +../rtl/cache/VX_snp_rsp_arb.v ../rtl/cache/VX_tag_data_access.v ../rtl/cache/VX_tag_data_structure.v +../rtl/cache/VX_snp_forwarder.v ../rtl/cache/VX_prefetcher.v ../rtl/interfaces/VX_branch_rsp_if.v @@ -34,6 +38,7 @@ vortex_afu.json ../rtl/interfaces/VX_cache_dram_req_if.v ../rtl/interfaces/VX_cache_dram_rsp_if.v ../rtl/interfaces/VX_cache_snp_req_if.v +../rtl/interfaces/VX_cache_snp_rsp_if.v ../rtl/interfaces/VX_csr_req_if.v ../rtl/interfaces/VX_exec_unit_req_if.v ../rtl/interfaces/VX_frE_to_bckE_req_if.v diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 0c4df51b..eee5a40e 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -40,9 +40,6 @@ localparam AVS_RD_QUEUE_SIZE = 16; localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; -localparam VX_SNOOP_DELAY = 1000; -localparam VX_SNOOP_LEVELS = 2; - localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher @@ -89,8 +86,13 @@ logic vx_dram_rsp_ready; logic vx_snp_req_valid; logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +logic [0:0] vx_snp_req_tag; logic vx_snp_req_ready; +logic vx_snp_rsp_valid; +logic [0:0] vx_snp_rsp_addr; +logic vx_snp_rsp_ready; + logic vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// @@ -207,9 +209,8 @@ end logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr; logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr; logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr; -logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr; -logic [9:0] snp_req_delay; -logic vx_reset; +logic vx_reset; +logic snp_rsp_done; always_ff @(posedge clk) begin @@ -267,7 +268,7 @@ begin end STATE_CLFLUSH: begin - if (snp_req_delay >= VX_SNOOP_DELAY) begin + if (snp_rsp_done) begin state <= STATE_IDLE; end end @@ -572,33 +573,48 @@ end // Vortex cache snooping ////////////////////////////////////////////////////// +logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr; +logic [DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr; + +always_comb +begin + snp_rsp_done = (snp_rsp_ctr >= csr_data_size); +end + always_ff @(posedge clk) begin if (SoftReset) begin vx_snp_req_valid <= 0; + vx_snp_req_tag <= 0; + vx_snp_rsp_ready <= 0; snp_req_ctr <= 0; - snp_req_delay <= 0; + snp_rsp_ctr <= 0; end else begin if (STATE_IDLE == state) begin - snp_req_ctr <= 0; - snp_req_delay <= 0; + snp_req_ctr <= 0; + snp_rsp_ctr <= 0; + vx_snp_rsp_ready <= 0; end vx_snp_req_valid <= 0; + vx_snp_rsp_ready <= 0; if ((STATE_CLFLUSH == state) && (snp_req_ctr < csr_data_size) && vx_snp_req_ready) begin vx_snp_req_addr <= csr_mem_addr + snp_req_ctr; - vx_snp_req_valid <= 1; snp_req_ctr <= snp_req_ctr + 1; + vx_snp_req_valid <= 1; + vx_snp_rsp_ready <= 1; end - if (snp_req_ctr == csr_data_size) begin - snp_req_delay <= snp_req_delay + 1; - end + if ((STATE_CLFLUSH == state) + && (snp_rsp_ctr < csr_data_size) + && vx_snp_rsp_valid) begin + snp_rsp_ctr <= snp_rsp_ctr + 1; + end end end @@ -622,11 +638,17 @@ Vortex_Socket #() vx_socket ( .dram_rsp_tag (vx_dram_rsp_tag), .dram_rsp_ready (vx_dram_rsp_ready), - // Cache snooping + // Snoop request .snp_req_valid (vx_snp_req_valid), .snp_req_addr (vx_snp_req_addr), + .snp_req_tag (vx_snp_req_tag), .snp_req_ready (vx_snp_req_ready), + // Snoop response + .snp_rsp_valid (vx_snp_rsp_valid), + .snp_rsp_tag (vx_snp_rsp_tag), + .snp_rsp_ready (vx_snp_rsp_ready), + // I/O request .io_req_read (), .io_req_write (), diff --git a/hw/rtl/VX_dram_arb.v b/hw/rtl/VX_dram_arb.v index b8f1262a..537a7a1f 100644 --- a/hw/rtl/VX_dram_arb.v +++ b/hw/rtl/VX_dram_arb.v @@ -53,13 +53,14 @@ module VX_dram_arb #( assign dram_req_data = core_req_data [bus_req_sel]; assign dram_req_tag = {core_req_tag [bus_req_sel], (`REQS_BITS)'(bus_req_sel)}; + genvar i; + for (i = 0; i < NUM_REQUESTS; i++) begin assign core_req_ready[i] = dram_req_ready && (bus_req_sel == `REQS_BITS'(i)); end wire [`REQS_BITS-1:0] bus_rsp_sel = dram_rsp_tag[`REQS_BITS-1:0]; - genvar i; for (i = 0; i < NUM_REQUESTS; i++) begin assign core_rsp_valid[i] = dram_rsp_valid && (bus_rsp_sel == `REQS_BITS'(i)); assign core_rsp_data[i] = dram_rsp_data; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 9325c4d8..d6214216 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -244,9 +244,6 @@ module VX_bank #( wire st2_pending_hazard_st1e; wire force_request_miss_st1e; - wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr; - wire[`BASE_ADDR_BITS-1:0] miss_add_wsel; - wire[`WORD_WIDTH-1:0] miss_add_data; wire[`REQS_BITS-1:0] miss_add_tid; wire[`REQ_TAG_WIDTH-1:0] miss_add_tag; wire[`BYTE_EN_BITS-1:0] miss_add_mem_read; @@ -386,6 +383,7 @@ module VX_bank #( wire is_snp_st1e; wire snp_to_mrvq_st1e; wire mrvq_init_ready_state_st1e; + wire miss_add_because_miss; assign is_snp_st1e = is_snp_st1[STAGE_1_CYCLES-1]; @@ -482,7 +480,7 @@ module VX_bank #( `DEBUG_END // Enqueue to miss reserv if it's a valid miss - wire miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2; + assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2; wire miss_add_because_pending = snp_to_mrvq_st2; wire miss_add_unqual = (miss_add_because_miss || miss_add_because_pending); @@ -494,9 +492,9 @@ module VX_bank #( || dwbq_push_stall || dram_fill_req_stall); - wire miss_add_addr = addr_st2; - wire miss_add_wsel = wsel_st2; - wire miss_add_data = writeword_st2; + wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; + wire [`BASE_ADDR_BITS-1:0] miss_add_wsel = wsel_st2; + wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; assign {miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2; wire miss_add_is_snp = is_snp_st2; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 288be4fc..390a85ec 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -66,11 +66,9 @@ module VX_cache_miss_resrv #( wire enqueue_possible = !miss_resrv_full; wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; - wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state; - - `IGNORE_WARNINGS_BEGIN +`IGNORE_WARNINGS_BEGIN wire [31:0] make_ready_push_full; - `IGNORE_WARNINGS_END +`IGNORE_WARNINGS_END reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready_push; @@ -79,7 +77,7 @@ module VX_cache_miss_resrv #( genvar i; generate for (i = 0; i < MRVQ_SIZE; i++) begin - assign valid_address_match[i] = valid_table[i] && (addr_table[i] == fill_addr_st1); + assign valid_address_match[i] = valid_table[i] && (addr_table[i] === fill_addr_st1); assign make_ready[i] = is_fill_st1 && valid_address_match[i]; end endgenerate @@ -98,6 +96,8 @@ module VX_cache_miss_resrv #( wire update_ready = (|make_ready); + wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state; + assign make_ready_push_full = ({31'b0, qual_mrvq_init} << enqueue_index); assign make_ready_push = make_ready_push_full[MRVQ_SIZE-1:0]; diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 9d200114..627b1fcc 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -1,4 +1,4 @@ -`include "VX_define.vh" +`include "VX_cache_config.vh" module VX_snp_forwarder #( parameter BANK_LINE_SIZE = 0,