diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 66517af2..09ab5d79 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -24,13 +24,6 @@ CXXFLAGS += -fPIC # Dump perf stats CXXFLAGS += -DDUMP_PERF_STATS -# Enable scope analyzer -# Enable scope analyzer -ifdef SCOPE - CXXFLAGS += -DSCOPE - SET_SCOPE = SCOPE=1 -endif - LDFLAGS += -shared FPGA_LIBS += -luuid -lopae-c @@ -53,7 +46,14 @@ PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so AFU_JSON_INFO = vortex_afu.h -SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp +SRCS = vortex.cpp ../common/vx_utils.cpp + +# Enable scope analyzer +ifdef SCOPE + CXXFLAGS += -DSCOPE + SRCS += vx_scope.cpp + SET_SCOPE = SCOPE=1 +endif all: vlsim @@ -64,7 +64,7 @@ json: ../../hw/opae/vortex_afu.json fpga: $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) -ase: $(SRCS) $(ASE_DIR) +asesim: $(SRCS) $(ASE_DIR) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) vlsim: $(SRCS) opae-vlsim diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 947e5698..f6d26d1c 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,10 +20,10 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DEBUG=1 +#DEBUG=1 SCOPE=1 CFLAGS += -fPIC diff --git a/driver/opae/vlsim/vortex_afu_shim.sv b/driver/opae/vlsim/vortex_afu_shim.sv index 4977979a..cf5735e4 100644 --- a/driver/opae/vlsim/vortex_afu_shim.sv +++ b/driver/opae/vlsim/vortex_afu_shim.sv @@ -87,7 +87,7 @@ t_if_ccip_Tx af2cp_sTxPort; vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) -) vortex_afu ( +) afu ( .clk(clk), .reset(reset), .cp2af_sRxPort(cp2af_sRxPort), diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 6ae578e6..703aca7c 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -509,12 +509,6 @@ extern int vx_start(vx_device_h hdevice) { // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN)); -/*#ifdef SCOPE - sleep(15); - vx_scope_stop(device->fpga, 0); - exit(0); -#endif*/ - return 0; } @@ -547,7 +541,7 @@ extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* valu // Ensure ready for new command if (vx_ready_wait(hdevice, -1) != 0) - return -1; + return -1; // write CSR value CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index dca8311f..536fe25f 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #ifdef USE_VLSIM #include "vlsim/fpga.h" @@ -39,14 +42,30 @@ #define CMD_SET_STOP 5 #define CMD_GET_OFFSET 6 -static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); +static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t); + +static constexpr int num_signals = sizeof(scope_taps) / sizeof(scope_tap_t); constexpr int calcFrameWidth(int index = 0) { - return (index < num_signals) ? (scope_signals[index].width + calcFrameWidth(index + 1)) : 0; + return (index < num_signals) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0; } static constexpr int fwidth = calcFrameWidth(); +#ifdef HANG_TIMEOUT +static std::thread g_timeout_thread; +static std::mutex g_timeout_mutex; + +static void timeout_callback(fpga_handle fpga) { + std::this_thread::sleep_for(std::chrono::seconds{60}); + if (!g_timeout_mutex.try_lock()) + return; + vx_scope_stop(fpga, HANG_TIMEOUT); + fpgaClose(fpga); + exit(0); +} +#endif + uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { while (delta != 0) { ofs << '#' << timestamp++ << std::endl; @@ -58,6 +77,27 @@ uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { return timestamp; } +void dump_taps(std::ofstream& ofs, int module) { + int i = 1; + for (auto& tap : scope_taps) { + if (tap.module != module) + continue; + ofs << "$var reg " << tap.width << " " << i << " " << tap.name << " $end" << std::endl; + i += 1; + } +} + +void dump_module(std::ofstream& ofs, int parent) { + for (auto& module : scope_modules) { + if (module.parent != parent) + continue; + ofs << "$scope module " << module.name << " $end" << std::endl; + dump_module(ofs, module.index); + dump_taps(ofs, module.index); + ofs << "$upscope $end" << std::endl; + } +} + int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) return -1; @@ -69,10 +109,20 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) { std::cout << "scope start delay: " << delay << std::endl; } +#ifdef HANG_TIMEOUT + g_timeout_thread = std::thread(timeout_callback, hfpga); + g_timeout_thread.detach(); +#endif + return 0; } int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { +#ifdef HANG_TIMEOUT + if (!g_timeout_mutex.try_lock()) + return 0; +#endif + if (nullptr == hfpga) return -1; @@ -89,11 +139,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; ofs << "$var reg 1 0 clk $end" << std::endl; - - for (int i = 0; i < num_signals; ++i) { - ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; - } - + dump_module(ofs, -1); + dump_taps(ofs, -1); ofs << "$upscope $end" << std::endl; ofs << "enddefinitions $end" << std::endl; @@ -158,7 +205,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word)); do { - int signal_width = scope_signals[signal_id-1].width; + int signal_width = scope_taps[signal_id-1].width; int word_offset = frame_offset % 64; signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; @@ -183,7 +230,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); timestamp = print_clock(ofs, delta + 1, timestamp); signal_id = num_signals; - //std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + if (0 == (frame_no % 100)) { + std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + } } } diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index f2d5518e..2bb09c4a 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,5 +1,7 @@ #pragma once +#define HANG_TIMEOUT 60 + int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1); \ No newline at end of file diff --git a/hw/opae/README b/hw/opae/README index 303f48df..5765123b 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -60,7 +60,7 @@ qsub-sim make ase # tests -./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256 +./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1 ./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 ./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd @@ -81,6 +81,7 @@ tar -zcvf run.log.tar.gz run.log tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd tar -zcvf run.log.tar.gz build_ase_1c/work/run.log +tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd # decompress VCD trace tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz diff --git a/hw/opae/ccip_std_afu.sv b/hw/opae/ccip_std_afu.sv index f56bb80f..1590e82f 100644 --- a/hw/opae/ccip_std_afu.sv +++ b/hw/opae/ccip_std_afu.sv @@ -104,7 +104,7 @@ module ccip_std_afu #( vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) - ) vortex_afu_inst ( + ) afu ( .clk (clk), .reset (reset_T1), diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index 96b1c98b..1356ecb4 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -1,7 +1,7 @@ # Analysis & Synthesis Assignments set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index ff122e99..3faca262 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -93,7 +93,7 @@ typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; state_t state; `ifdef SCOPE -`SCOPE_SIGNALS_DECL +`SCOPE_DECL_SIGNALS `endif // Vortex ports /////////////////////////////////////////////////////////////// @@ -511,8 +511,8 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest; assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; assign avs_pending_reads_next = avs_pending_reads - + (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : - (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); + + $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : + (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW; @@ -573,8 +573,8 @@ begin end if (cci_dram_rd_req_fire) begin - cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1; - cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1; + cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1); + cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next); `endif @@ -582,7 +582,7 @@ begin if (cci_dram_wr_req_fire) begin cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); - cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1; + cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); `endif @@ -683,15 +683,15 @@ end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; -assign cci_rd_req_ctr_next = cci_rd_req_ctr + (cci_rd_req_fire ? 1 : 0); +assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; assign cci_pending_reads_next = cci_pending_reads - + ((cci_rd_req_fire && !cci_rdq_pop) ? 1 : - (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); + + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : + (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; @@ -734,7 +734,7 @@ begin end if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1; + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1); if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 0; // restart new request batch end @@ -787,8 +787,8 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull; assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; assign cci_pending_writes_next = cci_pending_writes - + ((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : - (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); + + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : + (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); @@ -817,8 +817,8 @@ begin if (cci_wr_req_fire) begin assert(cci_wr_req_ctr != 0); - cci_wr_req_addr <= cci_wr_req_addr + 1; - cci_wr_req_ctr <= cci_wr_req_ctr - 1; + cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); + cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); `endif @@ -854,8 +854,8 @@ end assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; -assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr; -assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr; +assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr; +assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr; assign cmd_clflush_done = (0 == snp_rsp_ctr); @@ -894,7 +894,7 @@ begin if (vx_snp_req_fire) begin assert(snp_req_ctr < snp_req_size); - vx_snp_req_addr <= vx_snp_req_addr + 1; + vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1); vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE @@ -954,15 +954,7 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_SIGNALS_ISTAGE_TOP_BIND - `SCOPE_SIGNALS_LSU_TOP_BIND - `SCOPE_SIGNALS_BANK_L3_TOP_BIND - `SCOPE_SIGNALS_BANK_L2_TOP_BIND - `SCOPE_SIGNALS_BANK_L1D_TOP_BIND - `SCOPE_SIGNALS_BANK_L1I_TOP_BIND - `SCOPE_SIGNALS_BANK_L1S_TOP_BIND - `SCOPE_SIGNALS_ISSUE_TOP_BIND - `SCOPE_SIGNALS_EXECUTE_TOP_BIND + `SCOPE_BIND_vortex_afu_vortex() .clk (clk), .reset (reset | vx_reset), @@ -1001,10 +993,10 @@ Vortex #() vortex ( `UNUSED_PIN (io_req_addr), `UNUSED_PIN (io_req_data), `UNUSED_PIN (io_req_tag), - .io_req_ready (1), + .io_req_ready (1'b1), // I/O response - .io_rsp_valid (0), + .io_rsp_valid (1'b0), .io_rsp_data (0), .io_rsp_tag (0), `UNUSED_PIN (io_rsp_ready), @@ -1069,20 +1061,20 @@ end `SCOPE_ASSIGN (scope_busy, vx_busy); -wire scope_changed = `SCOPE_TRIGGERS; +wire scope_changed = `SCOPE_TRIGGER; VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST})), + .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), .BUSW (64), .SIZE (4096), - .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) + .UPDW ($bits({`SCOPE_UPDATE_LIST})) ) scope ( .clk (clk), .reset (reset), .start (scope_start), - .stop (0), + .stop (1'b0), .changed (scope_changed), - .data_in ({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}), + .data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}), .bus_in (cmd_scope_wdata), .bus_out (cmd_scope_rdata), .bus_read (cmd_scope_read), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index d3b75b68..5f69a8e2 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -3,14 +3,7 @@ module VX_cluster #( parameter CLUSTER_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_IO - `SCOPE_SIGNALS_LSU_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L2_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO - `SCOPE_SIGNALS_ISSUE_CLUSTER_IO - `SCOPE_SIGNALS_EXECUTE_CLUSTER_IO + `SCOPE_IO_VX_cluster // Clock input wire clk, @@ -141,13 +134,7 @@ module VX_cluster #( VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( - `SCOPE_SIGNALS_ISTAGE_SELECT(i) - `SCOPE_SIGNALS_LSU_SELECT(i) - `SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(i) - `SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(i) - `SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(i) - `SCOPE_SIGNALS_ISSUE_SELECT(i) - `SCOPE_SIGNALS_EXECUTE_SELECT(i) + `SCOPE_BIND_VX_cluster_core(i) .clk (clk), .reset (reset), @@ -385,7 +372,7 @@ module VX_cluster #( .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) ) l2cache ( - `SCOPE_SIGNALS_BANK_L2_CACHE_BIND + `SCOPE_BIND_VX_cluster_l2cache() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 91e0a6ac..f47eabd8 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -3,13 +3,7 @@ module VX_core #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_BANK_L1D_CORE_IO - `SCOPE_SIGNALS_BANK_L1I_CORE_IO - `SCOPE_SIGNALS_BANK_L1S_CORE_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_core // Clock input wire clk, @@ -181,10 +175,7 @@ module VX_core #( VX_pipeline #( .CORE_ID(CORE_ID) ) pipeline ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_BIND_VX_core_pipeline() .clk(clk), .reset(reset), @@ -260,9 +251,7 @@ module VX_core #( VX_mem_unit #( .CORE_ID(CORE_ID) ) mem_unit ( - `SCOPE_SIGNALS_BANK_L1D_CORE_BIND - `SCOPE_SIGNALS_BANK_L1I_CORE_BIND - `SCOPE_SIGNALS_BANK_L1S_CORE_BIND + `SCOPE_BIND_VX_core_mem_unit() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 36ba7baf..40b76d83 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -7,7 +7,7 @@ module VX_csr_unit #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_issue_if csr_to_issue_if, + VX_csr_to_issue_if csr_to_issue_if, VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, @@ -15,8 +15,8 @@ module VX_csr_unit #( VX_csr_req_if csr_req_if, VX_exu_to_cmt_if csr_commit_if ); - VX_csr_req_if csr_pipe_req_if(); - VX_exu_to_cmt_if csr_pipe_rsp_if(); + VX_csr_req_if csr_pipe_req_if(); + VX_exu_to_cmt_if csr_pipe_rsp_if(); wire select_io_req = csr_io_req_if.valid; wire select_io_rsp; diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index f542d3ce..e56e583c 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -3,8 +3,7 @@ module VX_execute #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_execute input wire clk, input wire reset, @@ -55,7 +54,7 @@ module VX_execute #( VX_lsu_unit #( .CORE_ID(CORE_ID) ) lsu_unit ( - `SCOPE_SIGNALS_LSU_BIND + `SCOPE_BIND_VX_execute_lsu_unit() .clk (clk), .reset (reset), .dcache_req_if (dcache_req_if), @@ -122,6 +121,7 @@ module VX_execute #( VX_gpu_unit #( .CORE_ID(CORE_ID) ) gpu_unit ( + `SCOPE_BIND_VX_execute_gpu_unit() .clk (clk), .reset (reset), .gpu_req_if (gpu_req_if), diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index cf0c2e45..1d304ffc 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -3,7 +3,7 @@ module VX_fetch #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_IO_VX_fetch input wire clk, input wire reset, @@ -29,6 +29,8 @@ module VX_fetch #( VX_warp_sched #( .CORE_ID(CORE_ID) ) warp_sched ( + `SCOPE_BIND_VX_fetch_warp_sched() + .clk (clk), .reset (reset), .warp_ctl_if (warp_ctl_if), @@ -43,7 +45,7 @@ module VX_fetch #( VX_icache_stage #( .CORE_ID(CORE_ID) ) icache_stage ( - `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_BIND_VX_fetch_icache_stage() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index 1d7224ab..c40df875 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -41,24 +41,20 @@ module VX_gpr_fp_ctrl ( read_rs1 <= 1; end - rsp_valid <= gpr_req_if.valid; - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; + rsp_valid <= gpr_req_if.valid; + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; if (read_rs1) begin - rsp_rs1_data <= rs1_data; + rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; end - rsp_rs2_data <= rs2_data; - rsp_rs3_data <= rs1_data; + rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; + rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; assert(read_rs1 || rsp_wid == gpr_req_if.wid); end end - always @(posedge clk) begin - - end - // outputs wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3; assign raddr1 = {gpr_req_if.wid, rs1}; diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 8f1b4483..352a17e0 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -12,15 +12,7 @@ module VX_gpr_ram ( ); `ifndef ASIC - reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; - - initial begin // initialize ram: set r0 = 0 - for (integer j = 0; j < `NUM_WARPS; j++) begin - for (integer i = 0; i < `NUM_REGS; i++) begin - ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}}; - end - end - end + reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 129da4c0..23d9db16 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -15,9 +15,8 @@ module VX_gpr_stage #( ); `UNUSED_VAR (reset) - wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [`NUM_THREADS-1:0][31:0] rs2_data; - wire [`NW_BITS+`NR_BITS-1:0] raddr1; + wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data; + wire [`NW_BITS+`NR_BITS-1:0] raddr1; VX_gpr_ram gpr_ram ( .clk (clk), @@ -57,8 +56,8 @@ module VX_gpr_stage #( rsp_valid <= gpr_req_if.valid; rsp_wid <= gpr_req_if.wid; rsp_pc <= gpr_req_if.PC; - rsp_rs1_data <= rs1_data; - rsp_rs2_data <= rs2_data; + rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; + rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; end end diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index ac6550a3..ffad1717 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -3,6 +3,8 @@ module VX_gpu_unit #( parameter CORE_ID = 0 ) ( + `SCOPE_IO_VX_gpu_unit + input wire clk, input wire reset, @@ -88,4 +90,18 @@ module VX_gpu_unit #( // can accept new request? assign gpu_req_if.ready = gpu_commit_if.ready; + `SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid); + `SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid); + `SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask); + `SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type); + `SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]); + `SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data); + `SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready); + `SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid); + `SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid); + `SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc); + `SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn); + `SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split); + `SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 3f891a45..8c712eff 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -20,16 +20,13 @@ module VX_ibuffer #( localparam ADDRW = $clog2(SIZE); localparam NWARPSW = $clog2(`NUM_WARPS+1); - `USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0]; - reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; - reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0]; - reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0]; - wire [`NUM_WARPS-1:0] q_full; wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size; wire [DATAW-1:0] q_data_in; wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev; + reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out; + reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready; wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready; @@ -39,41 +36,50 @@ module VX_ibuffer #( wire writing = enq_fire && (i == ibuf_enq_if.wid); wire reading = deq_fire && (i == ibuf_deq_if.wid); - wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0]; - wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0]; - + wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading)); + + wire push = writing && !is_slot0; + wire pop = reading && (size_r[i] != 1); + + VX_generic_queue #( + .DATAW(DATAW), + .SIZE(SIZE) + ) queue ( + .clk (clk), + .reset (reset), + .push (push), + .data_in (q_data_in), + .pop (pop), + .data_out (q_data_prev[i]), + `UNUSED_PIN (empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + always @(posedge clk) begin + if (writing && is_slot0) begin + q_data_out[i] <= q_data_in; + end + if (pop) begin + q_data_out[i] <= q_data_prev[i]; + end + end + always @(posedge clk) begin if (reset) begin - rd_ptr_r[i] <= 0; - wr_ptr_r[i] <= 0; - size_r[i] <= 0; + size_r[i] <= 0; end else begin - if (writing) begin - if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin - q_data_out[i] <= q_data_in; - end else begin - entries[i][wr_ptr_a] <= q_data_in; - wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1); - end - if (!reading) begin - size_r[i] <= size_r[i] + SIZEW'(1); - end + if (writing && !reading) begin + size_r[i] <= size_r[i] + SIZEW'(1); end - if (reading) begin - if (size_r[i] != 1) begin - q_data_out[i] <= q_data_prev[i]; - rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1); - end - if (!writing) begin - size_r[i] <= size_r[i] - SIZEW'(1); - end + if (reading && !writing) begin + size_r[i] <= size_r[i] - SIZEW'(1); end end - end + end - assign q_data_prev[i] = entries[i][rd_ptr_a]; - assign q_full[i] = (size_r[i] == SIZE); - assign q_size[i] = size_r[i]; + assign q_full[i] = (size_r[i] == SIZE); + assign q_size[i] = size_r[i]; end /////////////////////////////////////////////////////////////////////////// @@ -144,9 +150,9 @@ module VX_ibuffer #( schedule_table[deq_wid_n] <= 0; end - deq_valid <= deq_valid_n; - deq_wid <= deq_wid_n; - deq_instr <= deq_instr_n; + deq_valid <= deq_valid_n; + deq_wid <= deq_wid_n; + deq_instr <= deq_instr_n; if (warp_added && !warp_removed) begin num_warps <= num_warps + NWARPSW'(1); diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1bb61f92..e0bf94af 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -3,7 +3,7 @@ module VX_icache_stage #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_IO_VX_icache_stage input wire clk, input wire reset, @@ -30,7 +30,7 @@ module VX_icache_stage #( always @(posedge clk) begin if (icache_req_fire) begin - rsp_PC_buf[req_tag] <= ifetch_req_if.PC; + rsp_PC_buf[req_tag] <= ifetch_req_if.PC; rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; end end diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index f388d3d0..e00097ae 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -1,4 +1,3 @@ - `include "VX_platform.vh" module VX_ipdom_stack #( @@ -17,33 +16,55 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - `USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - `USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; - `USE_FAST_BRAM reg is_part [0:STACK_SIZE-1]; + reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; + reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; + reg is_part [0:STACK_SIZE-1]; reg [DEPTH-1:0] rd_ptr, wr_ptr; + reg [WIDTH - 1:0] d1, d2; + reg p; + always @(posedge clk) begin if (reset) begin + rd_ptr <= 0; wr_ptr <= 0; end else begin if (push) begin - stack_1[wr_ptr] <= q1; - stack_2[wr_ptr] <= q2; - is_part[wr_ptr] <= 0; rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + DEPTH'(1); end else if (pop) begin wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]); rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]); - is_part[rd_ptr] <= 1; end end end - assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr]; + always @(posedge clk) begin + if (push) begin + stack_1[wr_ptr] <= q1; + end + end + assign d1 = stack_1[rd_ptr]; - assign empty = (0 == wr_ptr); + always @(posedge clk) begin + if (push) begin + stack_2[wr_ptr] <= q2; + end + end + assign d2 = stack_2[rd_ptr]; + + always @(posedge clk) begin + if (push) begin + is_part[wr_ptr] <= 0; + end else if (pop) begin + is_part[rd_ptr] <= 1; + end + end + assign p = is_part[rd_ptr]; + + assign d = p ? d1 : d2; + assign empty = ~(| wr_ptr); assign full = ((STACK_SIZE-1) == wr_ptr); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 1c1e4f8a..46e0388f 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -3,7 +3,7 @@ module VX_issue #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_IO_VX_issue input wire clk, input wire reset, diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 797b30cd..52646138 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -3,7 +3,7 @@ module VX_lsu_unit #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_LSU_IO + `SCOPE_IO_VX_lsu_unit input wire clk, input wire reset, diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 9cd29a1a..ae8fddde 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -3,9 +3,7 @@ module VX_mem_unit # ( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_BANK_L1D_CORE_IO - `SCOPE_SIGNALS_BANK_L1I_CORE_IO - `SCOPE_SIGNALS_BANK_L1S_CORE_IO + `SCOPE_IO_VX_mem_unit input wire clk, input wire reset, @@ -79,7 +77,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) ) smem ( - `SCOPE_SIGNALS_BANK_L1S_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_smem() .clk (clk), .reset (reset), @@ -106,7 +104,7 @@ module VX_mem_unit # ( `UNUSED_PIN (dram_req_addr), `UNUSED_PIN (dram_req_data), `UNUSED_PIN (dram_req_tag), - .dram_req_ready (0), + .dram_req_ready (1'b0), // DRAM response .dram_rsp_valid (0), @@ -115,7 +113,7 @@ module VX_mem_unit # ( `UNUSED_PIN (dram_rsp_ready), // Snoop request - .snp_req_valid (0), + .snp_req_valid (1'b0), .snp_req_addr (0), .snp_req_invalidate (0), .snp_req_tag (0), @@ -124,17 +122,17 @@ module VX_mem_unit # ( // Snoop response `UNUSED_PIN (snp_rsp_valid), `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (0), + .snp_rsp_ready (1'b0), // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); @@ -161,7 +159,7 @@ module VX_mem_unit # ( .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) dcache ( - `SCOPE_SIGNALS_BANK_L1D_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_dcache() .clk (clk), .reset (reset), @@ -213,10 +211,10 @@ module VX_mem_unit # ( `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); @@ -242,7 +240,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) ) icache ( - `SCOPE_SIGNALS_BANK_L1I_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_icache() .clk (clk), .reset (reset), @@ -278,26 +276,26 @@ module VX_mem_unit # ( .dram_rsp_ready (icache_dram_rsp_if.ready), // Snoop request - .snp_req_valid (0), + .snp_req_valid (1'b0), .snp_req_addr (0), - .snp_req_invalidate (0), + .snp_req_invalidate (1'b0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), // Snoop response `UNUSED_PIN (snp_rsp_valid), `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (0), + .snp_rsp_ready (1'b0), // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index c2629eec..86cd7003 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -3,10 +3,7 @@ module VX_pipeline #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_pipeline // Clock input wire clk, @@ -126,7 +123,7 @@ module VX_pipeline #( VX_fetch #( .CORE_ID(CORE_ID) ) fetch ( - `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_BIND_VX_pipeline_fetch() .clk (clk), .reset (reset), .icache_req_if (core_icache_req_if), @@ -153,7 +150,7 @@ module VX_pipeline #( VX_issue #( .CORE_ID(CORE_ID) ) issue ( - `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_BIND_VX_pipeline_issue() .clk (clk), .reset (reset), @@ -173,8 +170,8 @@ module VX_pipeline #( VX_execute #( .CORE_ID(CORE_ID) ) execute ( - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_BIND_VX_pipeline_execute() + .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index a1818d62..a377c461 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -52,7 +52,7 @@ /////////////////////////////////////////////////////////////////////////////// `define USE_FAST_BRAM (* syn_ramstyle = "mlab" *) -`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *) +`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 4292bb8f..2c007e33 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,4 +1,3 @@ - `ifndef VX_SCOPE `define VX_SCOPE @@ -6,86 +5,76 @@ `include "scope-defs.vh" -`define SCOPE_ASSIGN(d,s) \ - `IGNORE_WARNINGS_BEGIN \ - assign d = s \ - `IGNORE_WARNINGS_END +`define SCOPE_ASSIGN(d,s) assign d = s `else -`define SCOPE_SIGNALS_ISTAGE_TOP_IO -`define SCOPE_SIGNALS_ISTAGE_TOP_BIND -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_IO -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_BIND -`define SCOPE_SIGNALS_ISTAGE_IO -`define SCOPE_SIGNALS_ISTAGE_BIND -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_ISTAGE_SELECT(__i__) -`define SCOPE_SIGNALS_LSU_TOP_IO -`define SCOPE_SIGNALS_LSU_TOP_BIND -`define SCOPE_SIGNALS_LSU_CLUSTER_IO -`define SCOPE_SIGNALS_LSU_CLUSTER_BIND -`define SCOPE_SIGNALS_LSU_IO -`define SCOPE_SIGNALS_LSU_BIND -`define SCOPE_SIGNALS_LSU_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_LSU_SELECT(__i__) -`define SCOPE_SIGNALS_ISSUE_TOP_IO -`define SCOPE_SIGNALS_ISSUE_TOP_BIND -`define SCOPE_SIGNALS_ISSUE_CLUSTER_IO -`define SCOPE_SIGNALS_ISSUE_CLUSTER_BIND -`define SCOPE_SIGNALS_ISSUE_IO -`define SCOPE_SIGNALS_ISSUE_BIND -`define SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_ISSUE_SELECT(__i__) -`define SCOPE_SIGNALS_EXECUTE_TOP_IO -`define SCOPE_SIGNALS_EXECUTE_TOP_BIND -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_IO -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_BIND -`define SCOPE_SIGNALS_EXECUTE_IO -`define SCOPE_SIGNALS_EXECUTE_BIND -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_EXECUTE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L3_TOP_IO -`define SCOPE_SIGNALS_BANK_L3_TOP_BIND -`define SCOPE_SIGNALS_BANK_L2_TOP_IO -`define SCOPE_SIGNALS_BANK_L2_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1D_TOP_IO -`define SCOPE_SIGNALS_BANK_L1D_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1I_TOP_IO -`define SCOPE_SIGNALS_BANK_L1I_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1S_TOP_IO -`define SCOPE_SIGNALS_BANK_L1S_TOP_BIND -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1D_CORE_IO -`define SCOPE_SIGNALS_BANK_L1D_CORE_BIND -`define SCOPE_SIGNALS_BANK_L1I_CORE_IO -`define SCOPE_SIGNALS_BANK_L1I_CORE_BIND -`define SCOPE_SIGNALS_BANK_L1S_CORE_IO -`define SCOPE_SIGNALS_BANK_L1S_CORE_BIND -`define SCOPE_SIGNALS_BANK_CACHE_IO -`define SCOPE_SIGNALS_BANK_CACHE_BIND -`define SCOPE_SIGNALS_BANK_IO -`define SCOPE_SIGNALS_BANK_BIND -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L3_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L2_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1D_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1I_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1S_CACHE_BIND -`define SCOPE_SIGNALS_BANK_SELECT(__i__) +`define SCOPE_IO_vortex_afu + +`define SCOPE_IO_VX_icache_stage + +`define SCOPE_IO_VX_fetch + +`define SCOPE_BIND_VX_fetch_icache_stage() + +`define SCOPE_IO_VX_pipeline + +`define SCOPE_BIND_VX_pipeline_fetch() + +`define SCOPE_IO_VX_core + +`define SCOPE_BIND_VX_core_pipeline() + +`define SCOPE_IO_VX_cluster + +`define SCOPE_BIND_VX_cluster_core(__i__) + +`define SCOPE_IO_Vortex + +`define SCOPE_BIND_Vortex_cluster(__i__) + +`define SCOPE_BIND_vortex_afu_vortex() + +`define SCOPE_IO_VX_lsu_unit + +`define SCOPE_IO_VX_execute + +`define SCOPE_BIND_VX_execute_lsu_unit() + +`define SCOPE_BIND_VX_pipeline_execute() + +`define SCOPE_IO_VX_issue + +`define SCOPE_BIND_VX_pipeline_issue() + +`define SCOPE_IO_VX_bank + +`define SCOPE_IO_VX_cache + +`define SCOPE_BIND_VX_cache_bank(__i__) + +`define SCOPE_BIND_Vortex_l3cache() + +`define SCOPE_BIND_VX_cluster_l2cache() + +`define SCOPE_IO_VX_mem_unit + +`define SCOPE_BIND_VX_mem_unit_dcache() + +`define SCOPE_BIND_VX_core_mem_unit() + +`define SCOPE_BIND_VX_mem_unit_icache() + +`define SCOPE_BIND_VX_mem_unit_smem() + +`define SCOPE_DECL_SIGNALS + +`define SCOPE_DATA_LIST + +`define SCOPE_UPDATE_LIST + +`define SCOPE_TRIGGER + `define SCOPE_ASSIGN(d,s) `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index dd7d22b5..26d051ae 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -28,12 +28,16 @@ typedef struct packed { logic [`NUM_THREADS-1:0] tmask; } gpu_tmc_t; +`define GPU_TMC_SIZE (1+`NUM_THREADS) + typedef struct packed { logic valid; logic [`NUM_WARPS-1:0] wmask; logic [31:0] pc; } gpu_wspawn_t; +`define GPU_WSPAWN_SIZE (1+`NUM_WARPS+32) + typedef struct packed { logic valid; logic diverged; @@ -42,10 +46,14 @@ typedef struct packed { logic [31:0] pc; } gpu_split_t; +`define GPU_SPLIT_SIZE (1+1+`NUM_THREADS+`NUM_THREADS+32) + typedef struct packed { logic valid; logic [`NB_BITS-1:0] id; logic [`NW_BITS-1:0] size_m1; } gpu_barrier_t; +`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS) + `endif \ No newline at end of file diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 2f60776c..2938c60b 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -3,6 +3,8 @@ module VX_warp_sched #( parameter CORE_ID = 0 ) ( + `SCOPE_IO_VX_warp_sched + input wire clk, input wire reset, @@ -248,4 +250,11 @@ module VX_warp_sched #( assign busy = (active_warps != 0); + `SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp); + `SCOPE_ASSIGN (scope_wsched_active_warps, active_warps); + `SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table); + `SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready); + `SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule); + `SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 58e01f55..772ac3c0 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -25,6 +25,7 @@ module VX_writeback #( wire wb_valid; wire [`NW_BITS-1:0] wb_wid; + wire [31:0] wb_PC; wire [`NUM_THREADS-1:0] wb_tmask; wire [`NR_BITS-1:0] wb_rd; wire [`NUM_THREADS-1:0][31:0] wb_data; @@ -42,6 +43,13 @@ module VX_writeback #( mul_valid ? mul_commit_if.wid : fpu_valid ? fpu_commit_if.wid : 0; + + assign wb_PC = alu_valid ? alu_commit_if.PC : + lsu_valid ? lsu_commit_if.PC : + csr_valid ? csr_commit_if.PC : + mul_valid ? mul_commit_if.PC : + fpu_valid ? fpu_commit_if.PC : + 0; assign wb_tmask = alu_valid ? alu_commit_if.tmask : lsu_valid ? lsu_commit_if.tmask : @@ -68,16 +76,16 @@ module VX_writeback #( wire stall = 0/*~writeback_if.ready && writeback_if.valid*/; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) + .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) ) wb_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (1'b0), - .in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}), - .out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data}) + .in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}), + .out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data}) ); - + assign alu_commit_if.ready = !stall; assign lsu_commit_if.ready = !stall && !alu_valid; assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 82f36353..5e422ed5 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -1,15 +1,7 @@ `include "VX_define.vh" module Vortex ( - `SCOPE_SIGNALS_ISTAGE_TOP_IO - `SCOPE_SIGNALS_LSU_TOP_IO - `SCOPE_SIGNALS_BANK_L3_TOP_IO - `SCOPE_SIGNALS_BANK_L2_TOP_IO - `SCOPE_SIGNALS_BANK_L1D_TOP_IO - `SCOPE_SIGNALS_BANK_L1I_TOP_IO - `SCOPE_SIGNALS_BANK_L1S_TOP_IO - `SCOPE_SIGNALS_ISSUE_TOP_IO - `SCOPE_SIGNALS_EXECUTE_TOP_IO + `SCOPE_IO_Vortex // Clock input wire clk, @@ -79,14 +71,7 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(0) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(0) + `SCOPE_BIND_Vortex_cluster(0) .clk (clk), .reset (reset), @@ -200,14 +185,7 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(i) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(i) + `SCOPE_BIND_Vortex_cluster(i) .clk (clk), .reset (reset), @@ -417,7 +395,7 @@ module Vortex ( .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) l3cache ( - `SCOPE_SIGNALS_BANK_L3_CACHE_BIND + `SCOPE_BIND_Vortex_l3cache() .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index e953b651..625c0e53 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -50,7 +50,7 @@ module VX_bank #( // Snooping request tag width parameter SNP_REQ_TAG_WIDTH = 0 ) ( - `SCOPE_SIGNALS_BANK_IO + `SCOPE_IO_VX_bank input wire clk, input wire reset, @@ -143,7 +143,7 @@ module VX_bank #( ) snp_req_queue ( .clk (clk), .reset (reset), - .push (snp_req_valid), + .push (snp_req_valid && snp_req_ready), .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), .pop (snrq_pop), .data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}), @@ -166,7 +166,7 @@ module VX_bank #( ) dfp_queue ( .clk (clk), .reset (reset), - .push (dram_fill_rsp_valid), + .push (dram_fill_rsp_valid && dram_fill_rsp_ready), .data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}), .pop (dfpq_pop), .data_out({dfpq_addr_st0, dfpq_filldata_st0}), @@ -353,7 +353,7 @@ module VX_bank #( .clk (clk), .reset (reset), .stall (stall_bank_pipe), - .flush (0), + .flush (1'b0), .in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), .out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); @@ -480,7 +480,7 @@ module VX_bank #( .clk (clk), .reset (reset), .stall (stall_bank_pipe), - .flush (0), + .flush (1'b0), .in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}), .out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2}) ); @@ -722,18 +722,18 @@ module VX_bank #( end `endif -`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0); -`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1); -`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2); +`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0); +`SCOPE_ASSIGN (scope_valid_st1, valid_st1); +`SCOPE_ASSIGN (scope_valid_st2, valid_st2); -`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1); -`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1); -`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1); -`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1); -`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe); +`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1); +`SCOPE_ASSIGN (scope_miss_st1, miss_st1); +`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1); +`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1); +`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe); -`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); -`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); -`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); endmodule diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index c3189499..869c32bf 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -51,15 +51,15 @@ module VX_cache #( parameter DRAM_TAG_WIDTH = 28, // Number of snoop forwarding requests - parameter NUM_SNP_REQUESTS = 2, + parameter NUM_SNP_REQUESTS = 1, // Snooping request tag width - parameter SNP_REQ_TAG_WIDTH = 28, + parameter SNP_REQ_TAG_WIDTH = 1, // Snooping forward tag width parameter SNP_FWD_TAG_WIDTH = 1 ) ( - `SCOPE_SIGNALS_BANK_CACHE_IO + `SCOPE_IO_VX_cache input wire clk, input wire reset, @@ -365,7 +365,7 @@ module VX_cache #( .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) bank ( - `SCOPE_SIGNALS_BANK_SELECT(i) + `SCOPE_BIND_VX_cache_bank(i) .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 7cf8b1c8..7cd20e43 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -91,7 +91,7 @@ module VX_cache_core_rsp_merge #( .clk (clk), .reset (reset), .stall (stall), - .flush (0), + .flush (1'b0), .in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) ); diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 6ed5b02c..74745ceb 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -125,12 +125,12 @@ module VX_cache_miss_resrv #( ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; - tail_ptr <= tail_ptr + 1; + tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (increment_head) begin valid_table[head_ptr] <= 0; - head_ptr <= head_ptr + 1; + head_ptr <= head_ptr + $bits(head_ptr)'(1); end else if (recover_state) begin - schedule_ptr <= schedule_ptr - 1; + schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1); end // update entry as 'ready' during DRAM fill response @@ -140,15 +140,15 @@ module VX_cache_miss_resrv #( if (mrvq_pop) begin ready_table[dequeue_index] <= 0; - schedule_ptr <= schedule_ptr + 1; + schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); end if (!(mrvq_push && increment_head)) begin if (mrvq_push) begin - size <= size + 1; + size <= size + $bits(size)'(1); end if (increment_head) begin - size <= size - 1; + size <= size - $bits(size)'(1); end end end diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 6dab29f9..a14f4ec9 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -19,15 +19,14 @@ module VX_generic_queue #( ); `STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!")) - reg [SIZEW-1:0] size_r; - wire reading; - wire writing; - - assign reading = pop && !empty; - assign writing = push && !full; + always @(*) begin + assert(!pop || !empty); + assert(!push || !full); + end if (SIZE == 1) begin // (SIZE == 1) + reg [SIZEW-1:0] size_r; reg [DATAW-1:0] head_r; always @(posedge clk) begin @@ -35,12 +34,12 @@ module VX_generic_queue #( head_r <= 0; size_r <= 0; end else begin - if (writing && !reading) begin + if (push && !pop) begin size_r <= 1; - end else if (reading && !writing) begin + end else if (pop && !push) begin size_r <= 0; end - if (writing) begin + if (push) begin head_r <= data_in; end end @@ -52,11 +51,59 @@ module VX_generic_queue #( assign size = size_r; end else begin // (SIZE > 1) + + `ifdef QUARTUS + + scfifo scfifo_component ( + .clock (clk), + .data (data_in), + .rdreq (pop), + .wrreq (push), + .empty (empty), + .full (full), + .q (data_out), + .sclr (reset), + .usedw (), + .aclr (), + .almost_empty (), + .almost_full (), + .eccstatus () + ); + + defparam + scfifo_component.lpm_type = "scfifo", + scfifo_component.intended_device_family = "Arria 10", + scfifo_component.lpm_numwords = SIZE, + scfifo_component.lpm_width = DATAW, + scfifo_component.lpm_widthu = $clog2(SIZE), + scfifo_component.lpm_showahead = "ON", + scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"), + scfifo_component.use_eab = "ON"; + + reg [SIZEW-1:0] size_r; + + always @(posedge clk) begin + if (reset) begin + size_r <= 0; + end else begin + if (push && !pop) begin + size_r <= size_r + SIZEW'(1); + end + if (pop && !push) begin + size_r <= size_r - SIZEW'(1); + end + end + end + + assign size = size_r; + + `else `USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0]; - if (0 == BUFFERED) begin + if (0 == BUFFERED) begin + reg [SIZEW-1:0] size_r; reg [ADDRW:0] rd_ptr_r; reg [ADDRW:0] wr_ptr_r; @@ -69,30 +116,35 @@ module VX_generic_queue #( wr_ptr_r <= 0; size_r <= 0; end else begin - if (writing) begin - data[wr_ptr_a] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - if (!reading) begin - size_r <= size_r + 1; + if (push) begin + wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1); + if (!pop) begin + size_r <= size_r + SIZEW'(1); end end - - if (reading) begin - rd_ptr_r <= rd_ptr_r + 1; - if (!writing) begin - size_r <= size_r - 1; + if (pop) begin + rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1); + if (!push) begin + size_r <= size_r - SIZEW'(1); end end end + end + + always @(posedge clk) begin + if (push) begin + data[wr_ptr_a] <= data_in; + end end - assign data_out = data[rd_ptr_a]; + assign data_out = data[rd_ptr_a]; assign empty = (wr_ptr_r == rd_ptr_r); assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]); - assign size = size_r; + assign size = size_r; end else begin + reg [SIZEW-1:0] size_r; reg [DATAW-1:0] head_r; reg [DATAW-1:0] curr_r; reg [ADDRW-1:0] wr_ptr_r; @@ -105,7 +157,6 @@ module VX_generic_queue #( always @(posedge clk) begin if (reset) begin size_r <= 0; - head_r <= 0; curr_r <= 0; wr_ptr_r <= 0; rd_ptr_r <= 0; @@ -113,43 +164,50 @@ module VX_generic_queue #( empty_r <= 1; full_r <= 0; end else begin - if (writing) begin - data[wr_ptr_r] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; + if (push) begin + wr_ptr_r <= wr_ptr_r + ADDRW'(1); - if (!reading) begin + if (!pop) begin empty_r <= 0; - if (size_r == ($bits(size_r)'(SIZE-1))) begin + if (size_r == SIZEW'(SIZE-1)) begin full_r <= 1; end - size_r <= size_r + 1; + size_r <= size_r + SIZEW'(1); end end - if (reading) begin + if (pop) begin rd_ptr_r <= rd_ptr_next_r; if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2); + rd_ptr_next_r <= rd_ptr_r + ADDRW'(2); end else begin // (SIZE == 2); rd_ptr_next_r <= ~rd_ptr_next_r; end - if (!writing) begin - if (size_r == 1) begin + if (!push) begin + if (size_r == SIZEW'(1)) begin assert(rd_ptr_next_r == wr_ptr_r); empty_r <= 1; end; full_r <= 0; - size_r <= size_r - 1; + size_r <= size_r - SIZEW'(1); end end - bypass_r <= writing - && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty - + bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop)); curr_r <= data_in; - head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; + end + end + + always @(posedge clk) begin + if (reset) begin + head_r <= 0; + end else begin + if (push) begin + data[wr_ptr_r] <= data_in; + end + head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r]; end end @@ -158,6 +216,9 @@ module VX_generic_queue #( assign full = full_r; assign size = size_r; end + + `endif + end endmodule diff --git a/hw/rtl/libs/VX_index_queue.v b/hw/rtl/libs/VX_index_queue.v index bee8ccb9..b40aa2a0 100644 --- a/hw/rtl/libs/VX_index_queue.v +++ b/hw/rtl/libs/VX_index_queue.v @@ -28,9 +28,13 @@ module VX_index_queue #( assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push && !full; + assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid + always @(*) begin + assert(!push || !full); + end + always @(posedge clk) begin if (reset) begin rd_ptr <= 0; diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 19f385c3..9490d6b3 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -126,11 +126,11 @@ module VX_scope #( || (trigger_id != prev_trigger_id)) begin delta_store[waddr] <= delta; data_store[waddr] <= data_in; - waddr <= waddr + 1; + waddr <= waddr + $bits(waddr)'(1); delta <= 0; delta_flush <= 0; end else begin - delta <= delta + 1; + delta <= delta + DELTAW'(1); delta_flush <= (delta == (MAX_DELTA-1)); end prev_trigger_id <= trigger_id; @@ -159,7 +159,7 @@ module VX_scope #( if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin read_offset <= read_offset + $bits(read_offset)'(BUSW); end else begin - raddr <= raddr + 1; + raddr <= raddr + $bits(raddr)'(1); read_offset <= 0; read_delta <= 1; if (raddr == waddr) begin diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 1001e32c..8b0ae92b 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -6,125 +6,196 @@ "../rtl/VX_define.vh", "../rtl/cache/VX_cache_config.vh" ], - "parameters": { - "L3_ENABLE": "`L3_ENABLE", - "L2_ENABLE": "`L2_ENABLE", - "NUM_CLUSTERS": "`NUM_CLUSTERS", - "NUM_CORES": "`NUM_CORES", - "DNUM_BANKS": "`DNUM_BANKS", - "INUM_BANKS": "`INUM_BANKS", - "SNUM_BANKS": "`SNUM_BANKS", - "L2NUM_BANKS": "`L2NUM_BANKS", - "L3NUM_BANKS": "`L3NUM_BANKS" + "modules": { + "*": { + "enabled": "(`NUM_CLUSTERS > 0)", + "submodules": { + "afu": {"type":"vortex_afu"} + } + }, + "vortex_afu": { + "submodules": { + "vortex": {"type":"Vortex"} + } + }, + "Vortex": { + "submodules": { + "cluster": {"type":"VX_cluster", "count":"`NUM_CLUSTERS"}, + "l3cache": {"type":"VX_cache", "enabled":"`L3_ENABLE", "params":{"NUM_BANKS":"`L3NUM_BANKS"}} + } + }, + "VX_cluster": { + "submodules": { + "core": {"type":"VX_core", "count":"`NUM_CORES"}, + "l2cache": {"type":"VX_cache", "enabled":"`L2_ENABLE", "params":{"NUM_BANKS":"`L2NUM_BANKS"}} + } + }, + "VX_core": { + "submodules": { + "pipeline": {"type":"VX_pipeline", "enabled":false}, + "mem_unit": {"type":"VX_mem_unit", "enabled":true} + } + }, + "VX_pipeline": { + "submodules": { + "fetch": {"type":"VX_fetch", "enabled":true}, + "decode": {"type":"VX_decode", "enabled":true}, + "issue": {"type":"VX_issue", "enabled":true}, + "execute": {"type":"VX_execute", "enabled":true}, + "commit": {"type":"VX_commit", "enabled":true} + } + }, + "VX_fetch": { + "submodules": { + "warp_sched": {"type":"VX_warp_sched"}, + "icache_stage": {"type":"VX_icache_stage"} + } + }, + "VX_warp_sched": {}, + "VX_icache_stage": {}, + "VX_decode": {}, + "VX_issue": {}, + "VX_execute": { + "submodules": { + "lsu_unit": {"type":"VX_lsu_unit"}, + "gpu_unit": {"type":"VX_gpu_unit"} + } + }, + "VX_commit": {}, + "VX_lsu_unit": {}, + "VX_gpu_unit": {}, + "VX_mem_unit": { + "submodules": { + "smem": {"type":"VX_cache", "params":{"NUM_BANKS":"`SNUM_BANKS"}}, + "dcache": {"type":"VX_cache", "params":{"NUM_BANKS":"`DNUM_BANKS"}}, + "icache": {"type":"VX_cache", "params":{"NUM_BANKS":"`INUM_BANKS"}} + } + }, + "VX_cache": { + "submodules": { + "bank": {"type":"VX_bank", "count":"NUM_BANKS"} + } + }, + "VX_bank": {} }, - "taps": { - "top::SCOPE_SIGNALS_AFU": { - "!scope_dram_req_valid": 1, - "scope_dram_req_addr": 32, - "scope_dram_req_rw": 1, - "scope_dram_req_byteen": "`VX_DRAM_BYTEEN_WIDTH", - "scope_dram_req_data": "`VX_DRAM_LINE_WIDTH", - "scope_dram_req_tag": "`VX_DRAM_TAG_WIDTH", - "!scope_dram_req_ready": 1, - "!scope_dram_rsp_valid": 1, - "scope_dram_rsp_data": "`VX_DRAM_LINE_WIDTH", - "scope_dram_rsp_tag": "`VX_DRAM_TAG_WIDTH", - "!scope_dram_rsp_ready": 1, - "!scope_snp_req_valid": 1, - "scope_snp_req_addr": 32, - "scope_snp_req_invalidate": 1, - "scope_snp_req_tag": "`VX_SNP_TAG_WIDTH", - "!scope_snp_req_ready": 1, - "!scope_snp_rsp_valid": 1, - "scope_snp_rsp_tag": "`VX_SNP_TAG_WIDTH", - "!scope_snp_rsp_ready": 1, - "scope_busy": 1 + "taps": { + "afu": { + "!reset": 1, + "?dram_req_valid": 1, + "dram_req_addr": 32, + "dram_req_rw": 1, + "dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH", + "dram_req_data":"`VX_DRAM_LINE_WIDTH", + "dram_req_tag":"`VX_DRAM_TAG_WIDTH", + "?dram_req_ready": 1, + "?dram_rsp_valid": 1, + "dram_rsp_data":"`VX_DRAM_LINE_WIDTH", + "dram_rsp_tag":"`VX_DRAM_TAG_WIDTH", + "?dram_rsp_ready": 1, + "?snp_req_valid": 1, + "snp_req_addr": 32, + "snp_req_invalidate": 1, + "snp_req_tag":"`VX_SNP_TAG_WIDTH", + "?snp_req_ready": 1, + "?snp_rsp_valid": 1, + "snp_rsp_tag":"`VX_SNP_TAG_WIDTH", + "?snp_rsp_ready": 1, + "busy": 1 }, - "core::SCOPE_SIGNALS_ISTAGE": { - "!scope_icache_req_valid": 1, - "scope_icache_req_wid": "`NW_BITS", - "scope_icache_req_addr": 32, - "scope_icache_req_tag": "`ICORE_TAG_ID_BITS", - "!scope_icache_req_ready": 1, - "!scope_icache_rsp_valid": 1, - "scope_icache_rsp_data": 32, - "scope_icache_rsp_tag": "`ICORE_TAG_ID_BITS", - "!scope_icache_rsp_ready": 1 + "afu/vortex/cluster/core/pipeline/fetch/icache_stage": { + "?icache_req_valid": 1, + "icache_req_wid":"`NW_BITS", + "icache_req_addr": 32, + "icache_req_tag":"`ICORE_TAG_ID_BITS", + "?icache_req_ready": 1, + "?icache_rsp_valid": 1, + "icache_rsp_data": 32, + "icache_rsp_tag":"`ICORE_TAG_ID_BITS", + "?icache_rsp_ready": 1 }, - "core::SCOPE_SIGNALS_LSU": { - "!scope_dcache_req_valid": "`NUM_THREADS", - "scope_dcache_req_wid": "`NW_BITS", - "scope_dcache_req_pc": 32, - "scope_dcache_req_addr": "`NUM_THREADS * 32", - "scope_dcache_req_rw": 1, - "scope_dcache_req_byteen": "`NUM_THREADS * 4", - "scope_dcache_req_data": "`NUM_THREADS * 32", - "scope_dcache_req_tag": "`DCORE_TAG_ID_BITS", - "!scope_dcache_req_ready": 1, - "!scope_dcache_rsp_valid": "`NUM_THREADS", - "scope_dcache_rsp_data": "`NUM_THREADS * 32", - "scope_dcache_rsp_tag": "`DCORE_TAG_ID_BITS", - "!scope_dcache_rsp_ready": 1 + "afu/vortex/cluster/core/pipeline/fetch/warp_sched": { + "?wsched_scheduled_warp": 1, + "wsched_active_warps": "`NUM_WARPS", + "wsched_schedule_table": "`NUM_WARPS", + "wsched_schedule_ready": "`NUM_WARPS", + "wsched_warp_to_schedule": "`NW_BITS", + "wsched_warp_pc": "32" }, - "core::SCOPE_SIGNALS_ISSUE": { - "!scope_issue_valid": 1, - "scope_issue_wid": "`NW_BITS", - "scope_issue_tmask": "`NUM_THREADS", - "scope_issue_pc": 32, - "scope_issue_ex_type": "`EX_BITS", - "scope_issue_op_type": "`OP_BITS", - "scope_issue_op_mod": "`MOD_BITS", - "scope_issue_wb": 1, - "scope_issue_rd": "`NR_BITS", - "scope_issue_rs1": "`NR_BITS", - "scope_issue_rs2": "`NR_BITS", - "scope_issue_rs3": "`NR_BITS", - "scope_issue_imm": 32, - "scope_issue_rs1_is_pc": 1, - "scope_issue_rs2_is_imm": 1, - "!scope_issue_ready": 1, - "scope_gpr_rsp_wid": "`NW_BITS", - "scope_gpr_rsp_pc": 32, - "scope_gpr_rsp_a": "`NUM_THREADS * 32", - "scope_gpr_rsp_b": "`NUM_THREADS * 32", - "scope_gpr_rsp_c": "`NUM_THREADS * 32", - "!scope_gpr_delay": 1, - "!scope_writeback_valid": 1, - "scope_writeback_wid": "`NW_BITS", - "scope_writeback_pc": 32, - "scope_writeback_rd": "`NR_BITS", - "scope_writeback_data": "`NUM_THREADS * 32", - "!scope_scoreboard_delay": 1, - "!scope_execute_delay": 1 - }, - "core::SCOPE_SIGNALS_EXECUTE": {}, - "bank::SCOPE_SIGNALS_BANK": { - "!scope_bank_valid_st0": 1, - "!scope_bank_valid_st1": 1, - "!scope_bank_valid_st2": 1, - "scope_bank_addr_st0": 32, - "scope_bank_addr_st1": 32, - "scope_bank_addr_st2": 32, - "scope_bank_is_mrvq_st1": 1, - "scope_bank_miss_st1": 1, - "scope_bank_dirty_st1": 1, - "!scope_bank_force_miss_st1": 1, - "!scope_bank_stall_pipe": 1 + "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { + "?gpu_req_valid": 1, + "gpu_req_wid": "`NW_BITS", + "gpu_req_tmask": "`NUM_THREADS", + "gpu_req_op_type": "`GPU_BITS", + "gpu_req_rs1": "32", + "gpu_req_rs2": "32", + "?gpu_req_ready": 1, + "?gpu_rsp_valid": 1, + "gpu_rsp_wid": "`NW_BITS", + "gpu_rsp_tmc": "`GPU_TMC_SIZE", + "gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE", + "gpu_rsp_split": "`GPU_SPLIT_SIZE", + "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" + }, + "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { + "?dcache_req_valid":"`NUM_THREADS", + "dcache_req_wid":"`NW_BITS", + "dcache_req_pc": 32, + "dcache_req_addr":"`NUM_THREADS * 32", + "dcache_req_rw": 1, + "dcache_req_byteen":"`NUM_THREADS * 4", + "dcache_req_data": "`NUM_THREADS * 32", + "dcache_req_tag":"`DCORE_TAG_ID_BITS", + "?dcache_req_ready": 1, + "?dcache_rsp_valid":"`NUM_THREADS", + "dcache_rsp_data":"`NUM_THREADS * 32", + "dcache_rsp_tag":"`DCORE_TAG_ID_BITS", + "?dcache_rsp_ready": 1 + }, + "afu/vortex/cluster/core/pipeline/issue": { + "?issue_valid": 1, + "issue_wid":"`NW_BITS", + "issue_tmask":"`NUM_THREADS", + "issue_pc": 32, + "issue_ex_type":"`EX_BITS", + "issue_op_type":"`OP_BITS", + "issue_op_mod":"`MOD_BITS", + "issue_wb": 1, + "issue_rd":"`NR_BITS", + "issue_rs1":"`NR_BITS", + "issue_rs2":"`NR_BITS", + "issue_rs3":"`NR_BITS", + "issue_imm": 32, + "issue_rs1_is_pc": 1, + "issue_rs2_is_imm": 1, + "?issue_ready": 1, + "?gpr_rsp_valid": 1, + "gpr_rsp_wid":"`NW_BITS", + "gpr_rsp_pc": 32, + "gpr_rsp_a":"`NUM_THREADS * 32", + "gpr_rsp_b":"`NUM_THREADS * 32", + "gpr_rsp_c":"`NUM_THREADS * 32", + "!gpr_delay": 1, + "?writeback_valid": 1, + "writeback_wid":"`NW_BITS", + "writeback_pc": 32, + "writeback_rd":"`NR_BITS", + "writeback_data":"`NUM_THREADS * 32", + "!scoreboard_delay": 1, + "!execute_delay": 1 + }, + "afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": { + "?valid_st0": 1, + "?valid_st1": 1, + "?valid_st2": 1, + "addr_st0": 32, + "addr_st1": 32, + "addr_st2": 32, + "is_mrvq_st1": 1, + "miss_st1": 1, + "dirty_st1": 1, + "!force_miss_st1": 1, + "!stall_pipe": 1 } - }, - "triggers": [ - ["scope_dram_req_valid", "scope_dram_req_ready"], - ["scope_dram_rsp_valid", "scope_dram_rsp_ready"], - ["scope_snp_req_valid", "scope_snp_req_ready"], - ["scope_snp_rsp_valid", "scope_snp_rsp_ready"], - - ["scope_icache_req_valid_top", "scope_icache_req_ready_top"], - ["scope_icache_rsp_valid_top", "scope_icache_rsp_ready_top"], - - ["scope_dcache_req_valid_top", "scope_dcache_req_ready_top"], - ["scope_dcache_rsp_valid_top", "scope_dcache_rsp_ready_top"], - - ["scope_issue_valid_top", "scope_issue_ready_top"] - ] + } } \ No newline at end of file diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index c360398c..9bedc02c 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -11,12 +11,89 @@ vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$") vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$") vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)") -parameters = [] exclude_files = [] include_dirs = [] macros = [] br_stack = [] +def translate_ternary(text): + + def skip_space(text, i, ln, step): + while (i >= 0) and (i < ln): + c = text[i] + if not c.isspace(): + break + i += step + return i + + def skip_expr(text, i, ln, step): + paren = 0 + checkparen = True + while (i >= 0) and (i < ln): + c = text[i] + if checkparen and (((step < 0) and (c == ')')) or ((step > 0) and (c == '('))): + paren += 1 + elif checkparen and (((step < 0) and (c == '(')) or ((step > 0) and (c == ')'))): + if (0 == paren): + break + paren -= 1 + if (0 == paren): + i = skip_space(text, i + step, ln, step) + checkparen = False + continue + elif (0 == paren) and not (c.isalnum() or (c == '_')): + break + i += step + return (i - step) + + def parse_ternary(text): + ternary = None + ln = len(text) + for i in range(1, ln): + c = text[i] + if not (c == '?'): + continue + # parse condition expression + i0 = skip_space(text, i - 1, ln, -1) + if (i < 0): + raise Exception("invalid condition expression") + i1 = skip_expr(text, i0, ln, -1) + if (i1 > i0): + raise Exception("invalid condition expression") + # parse true expression + i2 = skip_space(text, i + 1, ln, 1) + if (i2 >= ln): + raise Exception("invalid true expression") + i3 = skip_expr(text, i2, ln, 1) + if (i3 < i2): + raise Exception("invalid true expression") + # parse colon + i4 = skip_space(text, i3 + 1, ln, 1) + if (i4 >= ln): + raise Exception("invalid colon") + if not (text[i4] == ':'): + raise Exception("missing colon") + # parse false expression + i5 = skip_space(text, i4 + 1, ln, 1) + if (i5 >= ln): + raise Exception("invalid false expression") + i6 = skip_expr(text, i5, ln, 1) + if (i6 < i5): + raise Exception("invalid false expression") + ternary = (i0, i1, i2, i3, i5, i6) + break + return ternary + + while True: + pos = parse_ternary(text) + if pos is None: + break + # convert to python ternary + newText = text[:pos[1]] + text[pos[2]:pos[3]+1] + " if " + text[pos[1]:pos[0]+1] + " else " + text[pos[4]:pos[5]+1] + text[pos[5]+1:] + text = newText + + return text + def parse_func_args(text): args = [] arg = '' @@ -26,7 +103,6 @@ def parse_func_args(text): paren = 1 for i in range(1, l): c = text[i] - if c == '(': paren += 1 elif c == ')': @@ -36,17 +112,14 @@ def parse_func_args(text): if paren == 0: l = i break - if c == ',' and paren == 1: if arg.strip(): args.append(arg) arg = '' else: arg += c - if paren != 0: raise Exception("missing closing parenthesis: " + text) - if arg.strip(): args.append(arg) @@ -90,9 +163,29 @@ def find_macro(name): return macro return None -def expand_text(text): +def expand_text(text, params): - class DoRepl(object): + def re_pattern_args(args): + p = "(? 0: + p += "|" + p += arg + i += 1 + p += ")(?![0-9a-zA-Z_])" + return p + + class DoReplParam(object): + def __init__(self, params): + self.params = params + self.expanded = False + def __call__(self, match): + name = match.group(1) + self.expanded = True + return self.params[name] + + class DoReplMacro(object): def __init__(self): self.expanded = False self.has_func = False @@ -107,17 +200,6 @@ def expand_text(text): return macro[2] return "`" + name - class DoRepl2(object): - def __init__(self, args, f_args): - map = {} - for i in range(len(args)): - map[args[i]] = f_args[i] - self.map = map - def __call__(self, match): - for key in match.groups(): - return self.map[key] - return group - def repl_func_macro(text): expanded = False match = re.search(vl_expand_re, text) @@ -137,14 +219,11 @@ def expand_text(text): if len(args) != len(f_args[0]): raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args)) - pattern = "(? 0: - pattern += "|" - pattern += args[i] - pattern += ")(?![0-9a-zA-Z_])" - - dorepl = DoRepl2(args, f_args[0]) + params[args[i]] = f_args[0][i] + dorepl = DoReplParam(params) value = re.sub(pattern, dorepl, value) str_head = text[0:match.start()] @@ -163,10 +242,18 @@ def expand_text(text): raise Exception("Macro recursion!") has_func = False while True: - do_repl = DoRepl() + params_updated = False + if not params is None: + do_repl = DoReplParam(params) + pattern = re_pattern_args(params) + new_text = re.sub(pattern, do_repl, text) + if do_repl.expanded: + text = new_text + params_updated = True + do_repl = DoReplMacro() new_text = re.sub(vl_expand_re, do_repl, text) - has_func = do_repl.has_func - if not do_repl.expanded: + has_func = do_repl.has_func + if not (params_updated or do_repl.expanded): break text = new_text changed = True @@ -291,7 +378,28 @@ def load_config(filename): print("condfig=", config) return config -def gen_vl_header(file, taps, triggers): +def eval_node(text, params): + def clog2(x): + l2 = math.log2(x) + cl = math.ceil(l2) + return int(cl) + + if not type(text) == str: + return text + + expanded = expand_text(text, params) + if expanded: + text = expanded + + try: + __text = text.replace('$clog2', '__clog2') + __text = translate_ternary(__text) + e = eval(__text, {'__clog2': clog2}) + return e + except (NameError, SyntaxError): + return text + +def gen_vl_header(file, modules, taps): header = ''' `ifndef VX_SCOPE_DEFS @@ -299,238 +407,274 @@ def gen_vl_header(file, taps, triggers): ''' footer = '`endif' - def signal_size(size, asize): - str_asize = "" - for s in asize: - if type(s) == int: - str_asize += "[" + str(s-1) + ":0]" - else: - str_asize += "[" + str(s) + "-1:0]" - + def signal_size(size, mn): if type(size) == int: - size1 = (size-1) - if size1 != 0: - return str_asize + "[" + str(size1) + ":0]" + if (size != mn): + return "[" + str(size-1) + ":0]" else: - return str_asize + return "" else: - return str_asize + "[(" + size + ")-1:0]" + return "[" + size + "-1:0]" - def generate_ports(tclass, tap, ports, new_taps): + def create_signal(key, ports): + if not key in ports: + ports[key] = [] + return ports[key] - def emit_io(tap, ports, prefix, asize, return_list, new_taps, is_enabled): - stap = tap + "_IO" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - if is_enabled: - for key in ports: - size = ports[key] - name = key - is_trigger = False - if name[0] == '!': - name = name[1:] - is_trigger = True - if not return_list is None: - return_list.append((name + prefix, size, asize, is_trigger)) - print("\toutput wire" + signal_size(size, asize) + " " + name + prefix + ", \\", file=f) - print("", file=f) - emit_bind(tap, ports, prefix, prefix, new_taps, is_enabled) - - def emit_bind(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): - stap = tap + "_BIND" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - for key in ports: - name = key - if name[0] == '!': - name = name[1:] - if is_enabled: - print("\t." + name + to_prefix + " (" + name + from_prefix + "), \\", file=f) - else: - if (from_prefix != to_prefix): - print("\t`UNUSED_PIN (" + name + to_prefix + "), \\", file=f) - print("", file=f) - - def emit_select(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): - stap = tap + "_SELECT(__i__)" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - if is_enabled: - for key in ports: - name = key - if name[0] == '!': - name = name[1:] - print("\t." + name + to_prefix + " (" + name + from_prefix + "[__i__]), \\", file=f) - print("", file=f) - - def do_top(tap, ports, new_taps): - out_ports = [] - for p in ports: - name = p - is_trigger = False - if name[0] == '!': - name = name[1:] - is_trigger = True - out_ports.append((name, ports[p], [], is_trigger)) - return out_ports - - def do_core(tap, ports, new_taps): - out_ports = [] - nclusters = parameters["NUM_CLUSTERS"] - ncores = parameters["NUM_CORES"] - emit_io(tap + "_TOP", ports, "_top", [nclusters, ncores], out_ports, new_taps, True) - emit_io(tap + "_CLUSTER", ports, "_cluster", [ncores], None, new_taps, True) - emit_io(tap + "", ports, "", [], None, new_taps, True) - emit_select(tap + "_CLUSTER", ports, "_top", "_cluster", new_taps, True) - emit_select(tap + "", ports, "_cluster", "", new_taps, True) - return out_ports - - def do_bank(tap, ports, new_taps): - out_ports = [] - - nclusters = parameters["NUM_CLUSTERS"] - ncores = parameters["NUM_CORES"] - has_l3 = (parameters["L3_ENABLE"] != 0) - has_l2 = (parameters["L2_ENABLE"] != 0) - - emit_io(tap + "_L3_TOP", ports, "_l3_cache", [parameters["L3NUM_BANKS"]], out_ports, new_taps, has_l3) - emit_io(tap + "_L2_TOP", ports, "_l2_top", [nclusters, parameters["L2NUM_BANKS"]], out_ports, new_taps, has_l2) - emit_io(tap + "_L1D_TOP", ports, "_l1d_top", [nclusters, ncores, parameters["DNUM_BANKS"]], out_ports, new_taps, True) - emit_io(tap + "_L1I_TOP", ports, "_l1i_top", [nclusters, ncores, parameters["INUM_BANKS"]], out_ports, new_taps, True) - emit_io(tap + "_L1S_TOP", ports, "_l1s_top", [nclusters, ncores, parameters["SNUM_BANKS"]], out_ports, new_taps, True) - - emit_io(tap + "_L2_CLUSTER", ports, "_l2_cache", [parameters["L2NUM_BANKS"]], None, new_taps, has_l2) - emit_io(tap + "_L1D_CLUSTER", ports, "_l1d_cluster", [ncores, parameters["DNUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1I_CLUSTER", ports, "_l1i_cluster", [ncores, parameters["INUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1S_CLUSTER", ports, "_l1s_cluster", [ncores, parameters["SNUM_BANKS"]], None, new_taps, True) - - emit_io(tap + "_L1D_CORE", ports, "_l1d_cache", [parameters["DNUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1I_CORE", ports, "_l1i_cache", [parameters["INUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1S_CORE", ports, "_l1s_cache", [parameters["SNUM_BANKS"]], None, new_taps, True) - - emit_io(tap + "_CACHE", ports, "_cache", ["NUM_BANKS"], None, new_taps, True) - emit_io(tap + "", ports, "", [], None, new_taps, True) - - emit_select(tap + "_L2_CLUSTER", ports, "_l2_top", "_l2_cache", new_taps, has_l2) - emit_select(tap + "_L1D_CLUSTER", ports, "_l1d_top", "_l1d_cluster", new_taps, True) - emit_select(tap + "_L1I_CLUSTER", ports, "_l1i_top", "_l1i_cluster", new_taps, True) - emit_select(tap + "_L1S_CLUSTER", ports, "_l1s_top", "_l1s_cluster", new_taps, True) - - emit_select(tap + "_L1D_CORE", ports, "_l1d_cluster", "_l1d_cache", new_taps, True) - emit_select(tap + "_L1I_CORE", ports, "_l1i_cluster", "_l1i_cache", new_taps, True) - emit_select(tap + "_L1S_CORE", ports, "_l1s_cluster", "_l1s_cache", new_taps, True) - - emit_bind(tap + "_L3_CACHE", ports, "_l3_cache", "_cache", new_taps, has_l3) - emit_bind(tap + "_L2_CACHE", ports, "_l2_cache", "_cache", new_taps, has_l2) - emit_bind(tap + "_L1D_CACHE", ports, "_l1d_cache", "_cache", new_taps, True) - emit_bind(tap + "_L1I_CACHE", ports, "_l1i_cache", "_cache", new_taps, True) - emit_bind(tap + "_L1S_CACHE", ports, "_l1s_cache", "_cache", new_taps, True) - - emit_select(tap + "", ports, "_cache", "", new_taps, True) - - return out_ports - - callbacks = { - "top": do_top, - "core": do_core, - "bank": do_bank - } - - return callbacks[tclass](tap, ports, new_taps) - - def trigger_size(name, ports): - for port in ports: - if port[0] == name: - return (port[1], port[2]) - return None - - def trigger_prefices(asize): - def Q(arr, ss, asize, idx, N): - for i in range(asize[idx]): - tmp = ss + '[' + str(i) + ']' - if (idx + 1) < N: - Q(arr, tmp, asize, idx + 1, N) - else: - arr.append(tmp) - - l = len(asize) - if l == 0: - return [""] - arr = [] - Q(arr, "", asize, 0, l) - return arr + def dic_insert(gdic, ldic, key, value, enabled): + if enabled: + ldic[key] = value + if key in gdic: + return False + if enabled: + gdic[key] = None + return True def trigger_name(name, size): if type(size) == int: - size1 = (size-1) - if size1 != 0: + if size != 1: return "(| " + name + ")" else: return name else: return "(| " + name + ")" - with open(file, 'w') as f: + def trigger_subscripts(asize): + def Q(arr, ss, asize, idx, N): + a = asize[idx] + if (a != 0): + for i in range(a): + tmp = ss + '[' + str(i) + ']' + if (idx + 1) < N: + Q(arr, tmp, asize, idx + 1, N) + else: + arr.append(tmp) + else: + if (idx + 1) < N: + Q(arr, ss, asize, idx + 1, N) + else: + arr.append(ss) + + if asize is None: + return [""] + ln = len(asize) + if (0 == ln): + return [""] + arr = [] + Q(arr, "", asize, 0, ln) + return arr + + + def visit_path(alltaps, ports, path, node, paths, modules, taps): + ntype = node["type"] + + enabled = True + if "enabled" in node: + enabled = eval_node(node["enabled"], None) + + curtaps = {} + + if (len(paths) != 0): + spath = paths.pop(0) + snodes = modules[ntype]["submodules"] + if not spath in snodes: + raise Exception("invalid path: " + spath + " in " + path) + snode = snodes[spath] + + subtaps = visit_path(alltaps, ports, spath, snode, paths, modules, taps) + + scount = 0 + if "count" in snode: + scount = eval_node(snode["count"], None) + + params = None + if "params" in snode: + params = snode["params"] + + new_staps = [] + + nn = "SCOPE_IO_" + ntype + pp = create_signal(nn, ports) + for key in subtaps: + subtap = subtaps[key] + s = subtap[0] + a = subtap[1] + t = subtap[2] + e = subtap[3] + + s = eval_node(s, params) + + e = eval_node(e, params) + if type(e) == str or type(enabled) == str: + me = str(e) + " and " + str(enabled) + else: + me = e and enabled + + aa = [scount] + sa = signal_size(scount, 0) + if a: + for i in a: + x = eval_node(i, params) + aa.append(x) + sa += signal_size(x, 0) + + if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t, me), e): + skey = key.replace('/', '_') + if e: + pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',') + new_staps.append(skey) + + ports[nn] = pp + + if (0 == scount): + nn = "SCOPE_BIND_" + ntype + '_' + spath + "()" + pp = create_signal(nn, ports) + for st in new_staps: + if e: + pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),") + else: + pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp + else: + nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)" + pp = create_signal(nn, ports) + for st in new_staps: + if e: + pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),") + else: + pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp + else: + nn = "SCOPE_IO_" + ntype + pp = create_signal(nn, ports) + for tk in taps: + trigger = 0 + name = tk + size = eval_node(taps[tk], None) + if name[0] == '!': + name = name[1:] + trigger = 1 + elif name[0] == '?': + name = name[1:] + trigger = 2 + if dic_insert(alltaps, curtaps, name, (size, None, trigger, enabled), True): + pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',') + + ports[nn] = pp + + return curtaps + + toptaps = {} + + with open(file, 'w') as f: + + top = modules['*'] + snodes = top["submodules"] + + ports = {} + alltaps = {} + + for key in taps: + skey_list = key.split(',') + _taps = taps[key] + for skey in skey_list: + print('processing node: ' + skey + ' ...') + paths = skey.strip().split('/') + spath = paths.pop(0) + if not spath in snodes: + raise Exception("invalid path: " + spath) + snode = snodes[spath] + curtaps = visit_path(alltaps, ports, spath, snode, paths, modules, _taps) + for tk in curtaps: + toptaps[tk] = curtaps[tk] + print(header, file=f) - all_ports = [] - new_taps = [] + for key in ports: + print("`define " + key + ' \\', file=f) + for port in ports[key]: + print(port + ' \\', file=f) + print("", file=f) - for key in taps: - [tclass, tap] = key.split('::') - ports = generate_ports(tclass, tap, taps[key], new_taps) - for port in ports: - all_ports.append(port) - - print("`define SCOPE_SIGNALS_DECL \\", file=f) - i = 0 - for port in all_ports: + print("`define SCOPE_DECL_SIGNALS \\", file=f) + i = 0 + for key in toptaps: + tap = toptaps[key] + name = key.replace('/', '_') + size = tap[0] + asize = tap[1] + enabled = tap[3] + sa = "" + if asize: + for a in asize: + sa += signal_size(a, 0) if i > 0: print(" \\", file=f) - print("\twire" + signal_size(port[1], port[2]) + " " + port[0] + ";", file=f, end='') + if not enabled: + print("`IGNORE_WARNINGS_BEGIN \\", file=f) + print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + '; \\', file=f) + print("`IGNORE_WARNINGS_END", file=f, end='') + else: + print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_SIGNALS_DATA_LIST \\", file=f) + print("`define SCOPE_DATA_LIST \\", file=f) i = 0 - for port in all_ports: - if port[3]: + for key in toptaps: + tap = toptaps[key] + if tap[2] != 0: continue + name = key.replace('/', '_') if i > 0: print(", \\", file=f) - print("\t" + port[0], file=f, end='') + print("\t scope_" + name, file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_SIGNALS_UPD_LIST \\", file=f) + print("`define SCOPE_UPDATE_LIST \\", file=f) i = 0 - for port in all_ports: - if not port[3]: + for key in toptaps: + tap = toptaps[key] + if tap[2] == 0: continue + name = key.replace('/', '_') if i > 0: print(", \\", file=f) - print("\t" + port[0], file=f, end='') + print("\t scope_" + name, file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_TRIGGERS \\", file=f) + print("`define SCOPE_TRIGGER \\", file=f) i = 0 - for trigger in triggers: - arr = trigger_size(trigger[0], all_ports) - if arr is None: + excluded_list = [] + for key in toptaps: + if key in excluded_list: continue - [size, asize] = arr - for prefix in trigger_prefices(asize): + tap = toptaps[key] + if tap[2] != 2: + continue + size = tap[0] + asize = tap[1] + sus = trigger_subscripts(asize) + for su in sus: if i > 0: - print(" | \\", file=f) - print("\t(", file=f, end='') - for j in range(len(trigger)): - if j > 0: - print(" && ", file=f, end='') - print(trigger_name(trigger[j] + prefix, size), file=f, end='') + print(" | \\", file=f) + print("\t(", file=f, end='') + name = trigger_name("scope_" + key.replace('/', '_') + su, size) + if key.endswith("_valid"): + ready_signal = key[:-6] + "_ready" + if ready_signal in toptaps: + rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size) + print(name + " && " + rname, file=f, end='') + excluded_list.append(ready_signal) + else: + print(name, file=f, end='') + else: + print(name, file=f, end='') print(")", file=f, end='') i += 1 print("", file=f) @@ -538,69 +682,110 @@ def gen_vl_header(file, taps, triggers): print(footer, file=f) - return all_ports + return toptaps -def gen_cc_header(file, ports): +def gen_cc_header(file, taps): header = ''' -#pragma once\n -struct scope_signal_t { +#pragma once + +struct scope_module_t { + const char* name; + int index; + int parent; +}; + +struct scope_tap_t { int width; const char* name; -};\n -inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n -static constexpr scope_signal_t scope_signals[] = {''' - - footer = "};" - - def eval_macro(text): - expanded = expand_text(text) - if expanded: - text = expanded - text = text.replace('$clog2', '__clog2') - return text - - def asize_name(asize): - def Q(arr, ss, asize, idx, N): - for i in range(asize[idx]): - tmp = ss + "_" + str(i) + int module; +}; +''' + def flatten_path(paths, sizes): + def Q(arr, ss, idx, N, paths, sizes): + size = sizes[idx] + if size != 0: + for i in range(sizes[idx]): + tmp = ss + ('/' if (ss != '') else '') + tmp += paths[idx] + '_' + str(i) + if (idx + 1) < N: + Q(arr, tmp, idx + 1, N, paths, sizes) + else: + arr.append(tmp) + else: + tmp = ss + ('/' if (ss != '') else '') + tmp += paths[idx] if (idx + 1) < N: - Q(arr, tmp, asize, idx + 1, N) + Q(arr, tmp, idx + 1, N, paths, sizes) else: - arr.append(tmp) + arr.append(tmp) - l = len(asize) - if l == 0: - return [""] arr = [] - Q(arr, "", asize, 0, l) - return arr + Q(arr, "", 0, len(asize), paths, asize) + return arr + + # flatten the taps + fdic = {} + for key in taps: + tap = taps[key] + size = str(tap[0]) + paths = key.split('/') + if (len(paths) > 1): + name = paths.pop(-1) + asize = tap[1] + for ss in flatten_path(paths, asize): + fdic[ss + '/' + name ] = [size, -1] + else: + fdic[key] = [size, -1] + + # generate module dic + mdic = {} + for key in fdic: + paths = key.split('/') + if len(paths) == 1: + continue + paths.pop(-1) + parent = -1 + for path in paths: + if not path in mdic: + index = len(mdic) + mdic[path] = (index, parent) + parent = index + else: + parent = mdic[path][0] + fdic[key][1] = parent with open(file, 'w') as f: print(header, file=f) + + print("static constexpr scope_module_t scope_modules[] = {", file=f) i = 0 - for port in ports: - if port[3]: - continue - name = port[0] - size = eval_macro(str(port[1])) - for ss in asize_name(port[2]): - if i > 0: - print(",", file=f) - print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') - i += 1 - for port in ports: - if not port[3]: - continue - name = port[0] - size = eval_macro(str(port[1])) - for ss in asize_name(port[2]): - if i > 0: - print(",", file=f) - print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') - i += 1 + for key in mdic: + m = mdic[key] + if i > 0: + print(',', file=f) + print("\t{\"" + key + "\", " + str(m[0]) + ", " + str(m[1]) + "}", file=f, end='') + i += 1 print("", file=f) - print(footer, file=f) + print("};", file=f) + + print("", file=f) + print("static constexpr scope_tap_t scope_taps[] = {", file=f) + i = 0 + for key in fdic: + size = fdic[key][0] + parent = fdic[key][1] + paths = key.split('/') + if len(paths) > 1: + name = paths.pop(-1) + else: + name = key + if i > 0: + print(',', file=f) + print("\t{" + size + ", \"" + name + "\", " + str(parent) + "}", file=f, end='') + i += 1 + print("", file=f) + print("};", file=f) def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') @@ -612,7 +797,6 @@ def main(): args = parser.parse_args() print("args=", args) - global parameters global exclude_files global include_dirs global macros @@ -630,13 +814,9 @@ def main(): if "includes" in config: parse_includes(config["includes"]) - - parameters = config["parameters"] - for key in parameters: - parameters[key] = int(eval(expand_text(str(parameters[key])))) - ports = gen_vl_header(args.vl, config["taps"], config["triggers"]) - gen_cc_header(args.cc, ports) + taps = gen_vl_header(args.vl, config["modules"], config["taps"]) + gen_cc_header(args.cc, taps) -if __name__ == "__main__": +if __name__ == '__main__': main() \ No newline at end of file diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 6258682f..544cea65 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -51,7 +51,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/yosys/synth.ys b/hw/syn/yosys/synth.ys index 958f0353..f3ac0b0e 100644 --- a/hw/syn/yosys/synth.ys +++ b/hw/syn/yosys/synth.ys @@ -1,17 +1,22 @@ +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_bypass_buffer.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_cam_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_elastic_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_index_queue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_multiplier.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_onehot_encooder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_serial_div.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_shift_register.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_skid_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v @@ -20,114 +25,72 @@ read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I.. read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_store.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_alu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_ctl_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cmt_to_csr_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_to_issue_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_decode_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exu_to_cmt_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_cmt_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_csr_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_mul_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_writeback_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_alu_unit.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_back_end.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_cluster.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_commit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_core.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_data.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_io_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_pipe.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_d_e_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_dcache_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_decode.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_exec_unit.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_f_d_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_execute.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fetch.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_front_end.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fpu_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_bypass.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_fp_ctrl.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_ram.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_stage.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_wrapper.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_inst.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_i_d_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ibuffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_icache_stage.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_inst_multiplex.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_instr_demux.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_io_arb.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ipdom_stack.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_issue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_lsu_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mul_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_pipeline.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scheduler.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_user_config.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scoreboard.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp_sched.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_writeback.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/Vortex.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_req_bank_sel.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_rsp_merge.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_mgr.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_unit.v hierarchy -check -top Vortex add -global_input reset 1 proc -global_arst reset