scope refactoring: adding modules definitions to VCD trace

This commit is contained in:
Blaise Tine
2020-10-12 23:26:02 -04:00
parent 309dd48fc6
commit 32da50816f
43 changed files with 1162 additions and 850 deletions

View File

@@ -24,13 +24,6 @@ CXXFLAGS += -fPIC
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
# Enable scope analyzer
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SET_SCOPE = SCOPE=1
endif
LDFLAGS += -shared
FPGA_LIBS += -luuid -lopae-c
@@ -53,7 +46,14 @@ PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp
SRCS = vortex.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += vx_scope.cpp
SET_SCOPE = SCOPE=1
endif
all: vlsim
@@ -64,7 +64,7 @@ json: ../../hw/opae/vortex_afu.json
fpga: $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
ase: $(SRCS) $(ASE_DIR)
asesim: $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
vlsim: $(SRCS) opae-vlsim

View File

@@ -20,10 +20,10 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
DEBUG=1
#DEBUG=1
SCOPE=1
CFLAGS += -fPIC

View File

@@ -87,7 +87,7 @@ t_if_ccip_Tx af2cp_sTxPort;
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
) vortex_afu (
) afu (
.clk(clk),
.reset(reset),
.cp2af_sRxPort(cp2af_sRxPort),

View File

@@ -509,12 +509,6 @@ extern int vx_start(vx_device_h hdevice) {
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
/*#ifdef SCOPE
sleep(15);
vx_scope_stop(device->fpga, 0);
exit(0);
#endif*/
return 0;
}

View File

@@ -4,6 +4,9 @@
#include <chrono>
#include <vector>
#include <assert.h>
#include <chrono>
#include <thread>
#include <mutex>
#ifdef USE_VLSIM
#include "vlsim/fpga.h"
@@ -39,14 +42,30 @@
#define CMD_SET_STOP 5
#define CMD_GET_OFFSET 6
static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
static constexpr int num_signals = sizeof(scope_taps) / sizeof(scope_tap_t);
constexpr int calcFrameWidth(int index = 0) {
return (index < num_signals) ? (scope_signals[index].width + calcFrameWidth(index + 1)) : 0;
return (index < num_signals) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
}
static constexpr int fwidth = calcFrameWidth();
#ifdef HANG_TIMEOUT
static std::thread g_timeout_thread;
static std::mutex g_timeout_mutex;
static void timeout_callback(fpga_handle fpga) {
std::this_thread::sleep_for(std::chrono::seconds{60});
if (!g_timeout_mutex.try_lock())
return;
vx_scope_stop(fpga, HANG_TIMEOUT);
fpgaClose(fpga);
exit(0);
}
#endif
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
@@ -58,6 +77,27 @@ uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
return timestamp;
}
void dump_taps(std::ofstream& ofs, int module) {
int i = 1;
for (auto& tap : scope_taps) {
if (tap.module != module)
continue;
ofs << "$var reg " << tap.width << " " << i << " " << tap.name << " $end" << std::endl;
i += 1;
}
}
void dump_module(std::ofstream& ofs, int parent) {
for (auto& module : scope_modules) {
if (module.parent != parent)
continue;
ofs << "$scope module " << module.name << " $end" << std::endl;
dump_module(ofs, module.index);
dump_taps(ofs, module.index);
ofs << "$upscope $end" << std::endl;
}
}
int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
if (nullptr == hfpga)
return -1;
@@ -69,10 +109,20 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
std::cout << "scope start delay: " << delay << std::endl;
}
#ifdef HANG_TIMEOUT
g_timeout_thread = std::thread(timeout_callback, hfpga);
g_timeout_thread.detach();
#endif
return 0;
}
int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
#ifdef HANG_TIMEOUT
if (!g_timeout_mutex.try_lock())
return 0;
#endif
if (nullptr == hfpga)
return -1;
@@ -89,11 +139,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
ofs << "$var reg 1 0 clk $end" << std::endl;
for (int i = 0; i < num_signals; ++i) {
ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl;
}
dump_module(ofs, -1);
dump_taps(ofs, -1);
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
@@ -158,7 +205,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
do {
int signal_width = scope_signals[signal_id-1].width;
int signal_width = scope_taps[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
@@ -183,7 +230,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, delta + 1, timestamp);
signal_id = num_signals;
//std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl;
if (0 == (frame_no % 100)) {
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl;
}
}
}

View File

@@ -1,5 +1,7 @@
#pragma once
#define HANG_TIMEOUT 60
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1);

View File

@@ -60,7 +60,7 @@ qsub-sim
make ase
# tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
@@ -81,6 +81,7 @@ tar -zcvf run.log.tar.gz run.log
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd
tar -zcvf run.log.tar.gz build_ase_1c/work/run.log
tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd
# decompress VCD trace
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz

View File

@@ -104,7 +104,7 @@ module ccip_std_afu #(
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
) vortex_afu_inst (
) afu (
.clk (clk),
.reset (reset_T1),

View File

@@ -1,7 +1,7 @@
# Analysis & Synthesis Assignments
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG

View File

@@ -93,7 +93,7 @@ typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
state_t state;
`ifdef SCOPE
`SCOPE_SIGNALS_DECL
`SCOPE_DECL_SIGNALS
`endif
// Vortex ports ///////////////////////////////////////////////////////////////
@@ -511,7 +511,7 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest;
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
assign avs_pending_reads_next = avs_pending_reads
+ (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
+ $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
@@ -573,8 +573,8 @@ begin
end
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1;
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1;
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next);
`endif
@@ -582,7 +582,7 @@ begin
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1;
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
`endif
@@ -683,14 +683,14 @@ end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
assign cci_rd_req_ctr_next = cci_rd_req_ctr + (cci_rd_req_fire ? 1 : 0);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
assign cci_pending_reads_next = cci_pending_reads
+ ((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
(!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
@@ -734,7 +734,7 @@ begin
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1;
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1);
if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
end
@@ -787,7 +787,7 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull;
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
assign cci_pending_writes_next = cci_pending_writes
+ ((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
+ $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
(!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
@@ -817,8 +817,8 @@ begin
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + 1;
cci_wr_req_ctr <= cci_wr_req_ctr - 1;
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
`endif
@@ -854,8 +854,8 @@ end
assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready;
assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr;
assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr;
assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr;
assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr;
assign cmd_clflush_done = (0 == snp_rsp_ctr);
@@ -894,7 +894,7 @@ begin
if (vx_snp_req_fire)
begin
assert(snp_req_ctr < snp_req_size);
vx_snp_req_addr <= vx_snp_req_addr + 1;
vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1);
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
snp_req_ctr <= snp_req_ctr_next;
`ifdef DBG_PRINT_OPAE
@@ -954,15 +954,7 @@ end
assign cmd_run_done = !vx_busy;
Vortex #() vortex (
`SCOPE_SIGNALS_ISTAGE_TOP_BIND
`SCOPE_SIGNALS_LSU_TOP_BIND
`SCOPE_SIGNALS_BANK_L3_TOP_BIND
`SCOPE_SIGNALS_BANK_L2_TOP_BIND
`SCOPE_SIGNALS_BANK_L1D_TOP_BIND
`SCOPE_SIGNALS_BANK_L1I_TOP_BIND
`SCOPE_SIGNALS_BANK_L1S_TOP_BIND
`SCOPE_SIGNALS_ISSUE_TOP_BIND
`SCOPE_SIGNALS_EXECUTE_TOP_BIND
`SCOPE_BIND_vortex_afu_vortex()
.clk (clk),
.reset (reset | vx_reset),
@@ -1001,10 +993,10 @@ Vortex #() vortex (
`UNUSED_PIN (io_req_addr),
`UNUSED_PIN (io_req_data),
`UNUSED_PIN (io_req_tag),
.io_req_ready (1),
.io_req_ready (1'b1),
// I/O response
.io_rsp_valid (0),
.io_rsp_valid (1'b0),
.io_rsp_data (0),
.io_rsp_tag (0),
`UNUSED_PIN (io_rsp_ready),
@@ -1069,20 +1061,20 @@ end
`SCOPE_ASSIGN (scope_busy, vx_busy);
wire scope_changed = `SCOPE_TRIGGERS;
wire scope_changed = `SCOPE_TRIGGER;
VX_scope #(
.DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST})),
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
.BUSW (64),
.SIZE (4096),
.UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST}))
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
) scope (
.clk (clk),
.reset (reset),
.start (scope_start),
.stop (0),
.stop (1'b0),
.changed (scope_changed),
.data_in ({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}),
.data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}),
.bus_in (cmd_scope_wdata),
.bus_out (cmd_scope_rdata),
.bus_read (cmd_scope_read),

View File

@@ -3,14 +3,7 @@
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_CLUSTER_IO
`SCOPE_SIGNALS_LSU_CLUSTER_IO
`SCOPE_SIGNALS_BANK_L2_CLUSTER_IO
`SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO
`SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO
`SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO
`SCOPE_SIGNALS_ISSUE_CLUSTER_IO
`SCOPE_SIGNALS_EXECUTE_CLUSTER_IO
`SCOPE_IO_VX_cluster
// Clock
input wire clk,
@@ -141,13 +134,7 @@ module VX_cluster #(
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_SIGNALS_ISTAGE_SELECT(i)
`SCOPE_SIGNALS_LSU_SELECT(i)
`SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(i)
`SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(i)
`SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(i)
`SCOPE_SIGNALS_ISSUE_SELECT(i)
`SCOPE_SIGNALS_EXECUTE_SELECT(i)
`SCOPE_BIND_VX_cluster_core(i)
.clk (clk),
.reset (reset),
@@ -385,7 +372,7 @@ module VX_cluster #(
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) l2cache (
`SCOPE_SIGNALS_BANK_L2_CACHE_BIND
`SCOPE_BIND_VX_cluster_l2cache()
.clk (clk),
.reset (reset),

View File

@@ -3,13 +3,7 @@
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_BANK_L1D_CORE_IO
`SCOPE_SIGNALS_BANK_L1I_CORE_IO
`SCOPE_SIGNALS_BANK_L1S_CORE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_core
// Clock
input wire clk,
@@ -181,10 +175,7 @@ module VX_core #(
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_core_pipeline()
.clk(clk),
.reset(reset),
@@ -260,9 +251,7 @@ module VX_core #(
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_unit (
`SCOPE_SIGNALS_BANK_L1D_CORE_BIND
`SCOPE_SIGNALS_BANK_L1I_CORE_BIND
`SCOPE_SIGNALS_BANK_L1S_CORE_BIND
`SCOPE_BIND_VX_core_mem_unit()
.clk (clk),
.reset (reset),

View File

@@ -3,8 +3,7 @@
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_execute
input wire clk,
input wire reset,
@@ -55,7 +54,7 @@ module VX_execute #(
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_BIND_VX_execute_lsu_unit()
.clk (clk),
.reset (reset),
.dcache_req_if (dcache_req_if),
@@ -122,6 +121,7 @@ module VX_execute #(
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
`SCOPE_BIND_VX_execute_gpu_unit()
.clk (clk),
.reset (reset),
.gpu_req_if (gpu_req_if),

View File

@@ -3,7 +3,7 @@
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_fetch
input wire clk,
input wire reset,
@@ -29,6 +29,8 @@ module VX_fetch #(
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
`SCOPE_BIND_VX_fetch_warp_sched()
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
@@ -43,7 +45,7 @@ module VX_fetch #(
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_fetch_icache_stage()
.clk (clk),
.reset (reset),

View File

@@ -46,19 +46,15 @@ module VX_gpr_fp_ctrl (
rsp_pc <= gpr_req_if.PC;
if (read_rs1) begin
rsp_rs1_data <= rs1_data;
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
end
rsp_rs2_data <= rs2_data;
rsp_rs3_data <= rs1_data;
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
end
end
always @(posedge clk) begin
end
// outputs
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
assign raddr1 = {gpr_req_if.wid, rs1};

View File

@@ -14,14 +14,6 @@ module VX_gpr_ram (
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
initial begin // initialize ram: set r0 = 0
for (integer j = 0; j < `NUM_WARPS; j++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}};
end
end
end
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin

View File

@@ -15,8 +15,7 @@ module VX_gpr_stage #(
);
`UNUSED_VAR (reset)
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
VX_gpr_ram gpr_ram (
@@ -57,8 +56,8 @@ module VX_gpr_stage #(
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rsp_rs1_data <= rs1_data;
rsp_rs2_data <= rs2_data;
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
end
end

View File

@@ -3,6 +3,8 @@
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_gpu_unit
input wire clk,
input wire reset,
@@ -88,4 +90,18 @@ module VX_gpu_unit #(
// can accept new request?
assign gpu_req_if.ready = gpu_commit_if.ready;
`SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid);
`SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid);
`SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask);
`SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type);
`SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]);
`SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data);
`SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready);
`SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid);
`SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc);
`SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn);
`SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split);
`SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier);
endmodule

View File

@@ -20,16 +20,13 @@ module VX_ibuffer #(
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0];
reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0];
wire [`NUM_WARPS-1:0] q_full;
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
@@ -39,39 +36,48 @@ module VX_ibuffer #(
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0];
wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading));
wire push = writing && !is_slot0;
wire pop = reading && (size_r[i] != 1);
VX_generic_queue #(
.DATAW(DATAW),
.SIZE(SIZE)
) queue (
.clk (clk),
.reset (reset),
.push (push),
.data_in (q_data_in),
.pop (pop),
.data_out (q_data_prev[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
always @(posedge clk) begin
if (writing && is_slot0) begin
q_data_out[i] <= q_data_in;
end
if (pop) begin
q_data_out[i] <= q_data_prev[i];
end
end
always @(posedge clk) begin
if (reset) begin
rd_ptr_r[i] <= 0;
wr_ptr_r[i] <= 0;
size_r[i] <= 0;
end else begin
if (writing) begin
if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin
q_data_out[i] <= q_data_in;
end else begin
entries[i][wr_ptr_a] <= q_data_in;
wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1);
end
if (!reading) begin
if (writing && !reading) begin
size_r[i] <= size_r[i] + SIZEW'(1);
end
end
if (reading) begin
if (size_r[i] != 1) begin
q_data_out[i] <= q_data_prev[i];
rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1);
end
if (!writing) begin
if (reading && !writing) begin
size_r[i] <= size_r[i] - SIZEW'(1);
end
end
end
end
assign q_data_prev[i] = entries[i][rd_ptr_a];
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end

View File

@@ -3,7 +3,7 @@
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_icache_stage
input wire clk,
input wire reset,

View File

@@ -1,4 +1,3 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
@@ -17,33 +16,55 @@ module VX_ipdom_stack #(
);
localparam STACK_SIZE = 2 ** DEPTH;
`USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg is_part [0:STACK_SIZE-1];
reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;
reg [WIDTH - 1:0] d1, d2;
reg p;
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
end else begin
if (push) begin
stack_1[wr_ptr] <= q1;
stack_2[wr_ptr] <= q2;
is_part[wr_ptr] <= 0;
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + DEPTH'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
end
end
end
always @(posedge clk) begin
if (push) begin
stack_1[wr_ptr] <= q1;
end
end
assign d1 = stack_1[rd_ptr];
always @(posedge clk) begin
if (push) begin
stack_2[wr_ptr] <= q2;
end
end
assign d2 = stack_2[rd_ptr];
always @(posedge clk) begin
if (push) begin
is_part[wr_ptr] <= 0;
end else if (pop) begin
is_part[rd_ptr] <= 1;
end
end
end
assign p = is_part[rd_ptr];
assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr];
assign empty = (0 == wr_ptr);
assign d = p ? d1 : d2;
assign empty = ~(| wr_ptr);
assign full = ((STACK_SIZE-1) == wr_ptr);
endmodule

View File

@@ -3,7 +3,7 @@
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_IO_VX_issue
input wire clk,
input wire reset,

View File

@@ -3,7 +3,7 @@
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_IO_VX_lsu_unit
input wire clk,
input wire reset,

View File

@@ -3,9 +3,7 @@
module VX_mem_unit # (
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_BANK_L1D_CORE_IO
`SCOPE_SIGNALS_BANK_L1I_CORE_IO
`SCOPE_SIGNALS_BANK_L1S_CORE_IO
`SCOPE_IO_VX_mem_unit
input wire clk,
input wire reset,
@@ -79,7 +77,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) smem (
`SCOPE_SIGNALS_BANK_L1S_CACHE_BIND
`SCOPE_BIND_VX_mem_unit_smem()
.clk (clk),
.reset (reset),
@@ -106,7 +104,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_req_addr),
`UNUSED_PIN (dram_req_data),
`UNUSED_PIN (dram_req_tag),
.dram_req_ready (0),
.dram_req_ready (1'b0),
// DRAM response
.dram_rsp_valid (0),
@@ -115,7 +113,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_rsp_ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_tag (0),
@@ -124,17 +122,17 @@ module VX_mem_unit # (
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@@ -161,7 +159,7 @@ module VX_mem_unit # (
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) dcache (
`SCOPE_SIGNALS_BANK_L1D_CACHE_BIND
`SCOPE_BIND_VX_mem_unit_dcache()
.clk (clk),
.reset (reset),
@@ -213,10 +211,10 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@@ -242,7 +240,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) icache (
`SCOPE_SIGNALS_BANK_L1I_CACHE_BIND
`SCOPE_BIND_VX_mem_unit_icache()
.clk (clk),
.reset (reset),
@@ -278,26 +276,26 @@ module VX_mem_unit # (
.dram_rsp_ready (icache_dram_rsp_if.ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_invalidate (1'b0),
.snp_req_tag (0),
`UNUSED_PIN (snp_req_ready),
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);

View File

@@ -3,10 +3,7 @@
module VX_pipeline #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_pipeline
// Clock
input wire clk,
@@ -126,7 +123,7 @@ module VX_pipeline #(
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_pipeline_fetch()
.clk (clk),
.reset (reset),
.icache_req_if (core_icache_req_if),
@@ -153,7 +150,7 @@ module VX_pipeline #(
VX_issue #(
.CORE_ID(CORE_ID)
) issue (
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_BIND_VX_pipeline_issue()
.clk (clk),
.reset (reset),
@@ -173,8 +170,8 @@ module VX_pipeline #(
VX_execute #(
.CORE_ID(CORE_ID)
) execute (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_pipeline_execute()
.clk (clk),
.reset (reset),

View File

@@ -52,7 +52,7 @@
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////

View File

@@ -1,4 +1,3 @@
`ifndef VX_SCOPE
`define VX_SCOPE
@@ -6,86 +5,76 @@
`include "scope-defs.vh"
`define SCOPE_ASSIGN(d,s) \
`IGNORE_WARNINGS_BEGIN \
assign d = s \
`IGNORE_WARNINGS_END
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_TOP_IO
`define SCOPE_SIGNALS_ISTAGE_TOP_BIND
`define SCOPE_SIGNALS_ISTAGE_CLUSTER_IO
`define SCOPE_SIGNALS_ISTAGE_CLUSTER_BIND
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_ISTAGE_SELECT(__i__)
`define SCOPE_SIGNALS_LSU_TOP_IO
`define SCOPE_SIGNALS_LSU_TOP_BIND
`define SCOPE_SIGNALS_LSU_CLUSTER_IO
`define SCOPE_SIGNALS_LSU_CLUSTER_BIND
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_LSU_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_LSU_SELECT(__i__)
`define SCOPE_SIGNALS_ISSUE_TOP_IO
`define SCOPE_SIGNALS_ISSUE_TOP_BIND
`define SCOPE_SIGNALS_ISSUE_CLUSTER_IO
`define SCOPE_SIGNALS_ISSUE_CLUSTER_BIND
`define SCOPE_SIGNALS_ISSUE_IO
`define SCOPE_SIGNALS_ISSUE_BIND
`define SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_ISSUE_SELECT(__i__)
`define SCOPE_SIGNALS_EXECUTE_TOP_IO
`define SCOPE_SIGNALS_EXECUTE_TOP_BIND
`define SCOPE_SIGNALS_EXECUTE_CLUSTER_IO
`define SCOPE_SIGNALS_EXECUTE_CLUSTER_BIND
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_EXECUTE_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L3_TOP_IO
`define SCOPE_SIGNALS_BANK_L3_TOP_BIND
`define SCOPE_SIGNALS_BANK_L2_TOP_IO
`define SCOPE_SIGNALS_BANK_L2_TOP_BIND
`define SCOPE_SIGNALS_BANK_L1D_TOP_IO
`define SCOPE_SIGNALS_BANK_L1D_TOP_BIND
`define SCOPE_SIGNALS_BANK_L1I_TOP_IO
`define SCOPE_SIGNALS_BANK_L1I_TOP_BIND
`define SCOPE_SIGNALS_BANK_L1S_TOP_IO
`define SCOPE_SIGNALS_BANK_L1S_TOP_BIND
`define SCOPE_SIGNALS_BANK_L2_CLUSTER_IO
`define SCOPE_SIGNALS_BANK_L2_CLUSTER_BIND
`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO
`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_BIND
`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO
`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_BIND
`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO
`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_BIND
`define SCOPE_SIGNALS_BANK_L1D_CORE_IO
`define SCOPE_SIGNALS_BANK_L1D_CORE_BIND
`define SCOPE_SIGNALS_BANK_L1I_CORE_IO
`define SCOPE_SIGNALS_BANK_L1I_CORE_BIND
`define SCOPE_SIGNALS_BANK_L1S_CORE_IO
`define SCOPE_SIGNALS_BANK_L1S_CORE_BIND
`define SCOPE_SIGNALS_BANK_CACHE_IO
`define SCOPE_SIGNALS_BANK_CACHE_BIND
`define SCOPE_SIGNALS_BANK_IO
`define SCOPE_SIGNALS_BANK_BIND
`define SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(__i__)
`define SCOPE_SIGNALS_BANK_L3_CACHE_BIND
`define SCOPE_SIGNALS_BANK_L2_CACHE_BIND
`define SCOPE_SIGNALS_BANK_L1D_CACHE_BIND
`define SCOPE_SIGNALS_BANK_L1I_CACHE_BIND
`define SCOPE_SIGNALS_BANK_L1S_CACHE_BIND
`define SCOPE_SIGNALS_BANK_SELECT(__i__)
`define SCOPE_IO_vortex_afu
`define SCOPE_IO_VX_icache_stage
`define SCOPE_IO_VX_fetch
`define SCOPE_BIND_VX_fetch_icache_stage()
`define SCOPE_IO_VX_pipeline
`define SCOPE_BIND_VX_pipeline_fetch()
`define SCOPE_IO_VX_core
`define SCOPE_BIND_VX_core_pipeline()
`define SCOPE_IO_VX_cluster
`define SCOPE_BIND_VX_cluster_core(__i__)
`define SCOPE_IO_Vortex
`define SCOPE_BIND_Vortex_cluster(__i__)
`define SCOPE_BIND_vortex_afu_vortex()
`define SCOPE_IO_VX_lsu_unit
`define SCOPE_IO_VX_execute
`define SCOPE_BIND_VX_execute_lsu_unit()
`define SCOPE_BIND_VX_pipeline_execute()
`define SCOPE_IO_VX_issue
`define SCOPE_BIND_VX_pipeline_issue()
`define SCOPE_IO_VX_bank
`define SCOPE_IO_VX_cache
`define SCOPE_BIND_VX_cache_bank(__i__)
`define SCOPE_BIND_Vortex_l3cache()
`define SCOPE_BIND_VX_cluster_l2cache()
`define SCOPE_IO_VX_mem_unit
`define SCOPE_BIND_VX_mem_unit_dcache()
`define SCOPE_BIND_VX_core_mem_unit()
`define SCOPE_BIND_VX_mem_unit_icache()
`define SCOPE_BIND_VX_mem_unit_smem()
`define SCOPE_DECL_SIGNALS
`define SCOPE_DATA_LIST
`define SCOPE_UPDATE_LIST
`define SCOPE_TRIGGER
`define SCOPE_ASSIGN(d,s)
`endif

View File

@@ -28,12 +28,16 @@ typedef struct packed {
logic [`NUM_THREADS-1:0] tmask;
} gpu_tmc_t;
`define GPU_TMC_SIZE (1+`NUM_THREADS)
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [31:0] pc;
} gpu_wspawn_t;
`define GPU_WSPAWN_SIZE (1+`NUM_WARPS+32)
typedef struct packed {
logic valid;
logic diverged;
@@ -42,10 +46,14 @@ typedef struct packed {
logic [31:0] pc;
} gpu_split_t;
`define GPU_SPLIT_SIZE (1+1+`NUM_THREADS+`NUM_THREADS+32)
typedef struct packed {
logic valid;
logic [`NB_BITS-1:0] id;
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
`endif

View File

@@ -3,6 +3,8 @@
module VX_warp_sched #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_warp_sched
input wire clk,
input wire reset,
@@ -248,4 +250,11 @@ module VX_warp_sched #(
assign busy = (active_warps != 0);
`SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp);
`SCOPE_ASSIGN (scope_wsched_active_warps, active_warps);
`SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table);
`SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready);
`SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule);
`SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc);
endmodule

View File

@@ -25,6 +25,7 @@ module VX_writeback #(
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [31:0] wb_PC;
wire [`NUM_THREADS-1:0] wb_tmask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
@@ -43,6 +44,13 @@ module VX_writeback #(
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_PC = alu_valid ? alu_commit_if.PC :
lsu_valid ? lsu_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
0;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
lsu_valid ? lsu_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
@@ -68,14 +76,14 @@ module VX_writeback #(
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data})
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
);
assign alu_commit_if.ready = !stall;

View File

@@ -1,15 +1,7 @@
`include "VX_define.vh"
module Vortex (
`SCOPE_SIGNALS_ISTAGE_TOP_IO
`SCOPE_SIGNALS_LSU_TOP_IO
`SCOPE_SIGNALS_BANK_L3_TOP_IO
`SCOPE_SIGNALS_BANK_L2_TOP_IO
`SCOPE_SIGNALS_BANK_L1D_TOP_IO
`SCOPE_SIGNALS_BANK_L1I_TOP_IO
`SCOPE_SIGNALS_BANK_L1S_TOP_IO
`SCOPE_SIGNALS_ISSUE_TOP_IO
`SCOPE_SIGNALS_EXECUTE_TOP_IO
`SCOPE_IO_Vortex
// Clock
input wire clk,
@@ -79,14 +71,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(0)
) cluster (
`SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_LSU_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(0)
`SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(0)
`SCOPE_BIND_Vortex_cluster(0)
.clk (clk),
.reset (reset),
@@ -200,14 +185,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(i)
) cluster (
`SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_LSU_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(i)
`SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(i)
`SCOPE_BIND_Vortex_cluster(i)
.clk (clk),
.reset (reset),
@@ -417,7 +395,7 @@ module Vortex (
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) l3cache (
`SCOPE_SIGNALS_BANK_L3_CACHE_BIND
`SCOPE_BIND_Vortex_l3cache()
.clk (clk),
.reset (reset),

View File

@@ -50,7 +50,7 @@ module VX_bank #(
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 0
) (
`SCOPE_SIGNALS_BANK_IO
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
@@ -143,7 +143,7 @@ module VX_bank #(
) snp_req_queue (
.clk (clk),
.reset (reset),
.push (snp_req_valid),
.push (snp_req_valid && snp_req_ready),
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.pop (snrq_pop),
.data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}),
@@ -166,7 +166,7 @@ module VX_bank #(
) dfp_queue (
.clk (clk),
.reset (reset),
.push (dram_fill_rsp_valid),
.push (dram_fill_rsp_valid && dram_fill_rsp_ready),
.data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}),
.pop (dfpq_pop),
.data_out({dfpq_addr_st0, dfpq_filldata_st0}),
@@ -353,7 +353,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
@@ -480,7 +480,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
);
@@ -722,18 +722,18 @@ module VX_bank #(
end
`endif
`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
endmodule

View File

@@ -51,15 +51,15 @@ module VX_cache #(
parameter DRAM_TAG_WIDTH = 28,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = 2,
parameter NUM_SNP_REQUESTS = 1,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 28,
parameter SNP_REQ_TAG_WIDTH = 1,
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) (
`SCOPE_SIGNALS_BANK_CACHE_IO
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
@@ -365,7 +365,7 @@ module VX_cache #(
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
`SCOPE_SIGNALS_BANK_SELECT(i)
`SCOPE_BIND_VX_cache_bank(i)
.clk (clk),
.reset (reset),

View File

@@ -91,7 +91,7 @@ module VX_cache_core_rsp_merge #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);

View File

@@ -125,12 +125,12 @@ module VX_cache_miss_resrv #(
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + 1;
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
head_ptr <= head_ptr + 1;
head_ptr <= head_ptr + $bits(head_ptr)'(1);
end else if (recover_state) begin
schedule_ptr <= schedule_ptr - 1;
schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1);
end
// update entry as 'ready' during DRAM fill response
@@ -140,15 +140,15 @@ module VX_cache_miss_resrv #(
if (mrvq_pop) begin
ready_table[dequeue_index] <= 0;
schedule_ptr <= schedule_ptr + 1;
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
end
if (!(mrvq_push && increment_head)) begin
if (mrvq_push) begin
size <= size + 1;
size <= size + $bits(size)'(1);
end
if (increment_head) begin
size <= size - 1;
size <= size - $bits(size)'(1);
end
end
end

View File

@@ -19,15 +19,14 @@ module VX_generic_queue #(
);
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
reg [SIZEW-1:0] size_r;
wire reading;
wire writing;
assign reading = pop && !empty;
assign writing = push && !full;
always @(*) begin
assert(!pop || !empty);
assert(!push || !full);
end
if (SIZE == 1) begin // (SIZE == 1)
reg [SIZEW-1:0] size_r;
reg [DATAW-1:0] head_r;
always @(posedge clk) begin
@@ -35,12 +34,12 @@ module VX_generic_queue #(
head_r <= 0;
size_r <= 0;
end else begin
if (writing && !reading) begin
if (push && !pop) begin
size_r <= 1;
end else if (reading && !writing) begin
end else if (pop && !push) begin
size_r <= 0;
end
if (writing) begin
if (push) begin
head_r <= data_in;
end
end
@@ -53,10 +52,58 @@ module VX_generic_queue #(
end else begin // (SIZE > 1)
`ifdef QUARTUS
scfifo scfifo_component (
.clock (clk),
.data (data_in),
.rdreq (pop),
.wrreq (push),
.empty (empty),
.full (full),
.q (data_out),
.sclr (reset),
.usedw (),
.aclr (),
.almost_empty (),
.almost_full (),
.eccstatus ()
);
defparam
scfifo_component.lpm_type = "scfifo",
scfifo_component.intended_device_family = "Arria 10",
scfifo_component.lpm_numwords = SIZE,
scfifo_component.lpm_width = DATAW,
scfifo_component.lpm_widthu = $clog2(SIZE),
scfifo_component.lpm_showahead = "ON",
scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"),
scfifo_component.use_eab = "ON";
reg [SIZEW-1:0] size_r;
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
end else begin
if (push && !pop) begin
size_r <= size_r + SIZEW'(1);
end
if (pop && !push) begin
size_r <= size_r - SIZEW'(1);
end
end
end
assign size = size_r;
`else
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
if (0 == BUFFERED) begin
reg [SIZEW-1:0] size_r;
reg [ADDRW:0] rd_ptr_r;
reg [ADDRW:0] wr_ptr_r;
@@ -69,20 +116,24 @@ module VX_generic_queue #(
wr_ptr_r <= 0;
size_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_a] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (!reading) begin
size_r <= size_r + 1;
if (push) begin
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
if (!pop) begin
size_r <= size_r + SIZEW'(1);
end
end
if (pop) begin
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
if (!push) begin
size_r <= size_r - SIZEW'(1);
end
end
end
end
if (reading) begin
rd_ptr_r <= rd_ptr_r + 1;
if (!writing) begin
size_r <= size_r - 1;
end
end
always @(posedge clk) begin
if (push) begin
data[wr_ptr_a] <= data_in;
end
end
@@ -93,6 +144,7 @@ module VX_generic_queue #(
end else begin
reg [SIZEW-1:0] size_r;
reg [DATAW-1:0] head_r;
reg [DATAW-1:0] curr_r;
reg [ADDRW-1:0] wr_ptr_r;
@@ -105,7 +157,6 @@ module VX_generic_queue #(
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
head_r <= 0;
curr_r <= 0;
wr_ptr_r <= 0;
rd_ptr_r <= 0;
@@ -113,43 +164,50 @@ module VX_generic_queue #(
empty_r <= 1;
full_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_r] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (push) begin
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
if (!reading) begin
if (!pop) begin
empty_r <= 0;
if (size_r == ($bits(size_r)'(SIZE-1))) begin
if (size_r == SIZEW'(SIZE-1)) begin
full_r <= 1;
end
size_r <= size_r + 1;
size_r <= size_r + SIZEW'(1);
end
end
if (reading) begin
if (pop) begin
rd_ptr_r <= rd_ptr_next_r;
if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
rd_ptr_next_r <= rd_ptr_r + ADDRW'(2);
end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r;
end
if (!writing) begin
if (size_r == 1) begin
if (!push) begin
if (size_r == SIZEW'(1)) begin
assert(rd_ptr_next_r == wr_ptr_r);
empty_r <= 1;
end;
full_r <= 0;
size_r <= size_r - 1;
size_r <= size_r - SIZEW'(1);
end
end
bypass_r <= writing
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop));
curr_r <= data_in;
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
end
end
always @(posedge clk) begin
if (reset) begin
head_r <= 0;
end else begin
if (push) begin
data[wr_ptr_r] <= data_in;
end
head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r];
end
end
@@ -158,6 +216,9 @@ module VX_generic_queue #(
assign full = full_r;
assign size = size_r;
end
`endif
end
endmodule

View File

@@ -28,9 +28,13 @@ module VX_index_queue #(
assign empty = (wr_ptr == rd_ptr);
assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]);
assign enqueue = push && !full;
assign enqueue = push;
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
always @(*) begin
assert(!push || !full);
end
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;

View File

@@ -126,11 +126,11 @@ module VX_scope #(
|| (trigger_id != prev_trigger_id)) begin
delta_store[waddr] <= delta;
data_store[waddr] <= data_in;
waddr <= waddr + 1;
waddr <= waddr + $bits(waddr)'(1);
delta <= 0;
delta_flush <= 0;
end else begin
delta <= delta + 1;
delta <= delta + DELTAW'(1);
delta_flush <= (delta == (MAX_DELTA-1));
end
prev_trigger_id <= trigger_id;
@@ -159,7 +159,7 @@ module VX_scope #(
if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin
read_offset <= read_offset + $bits(read_offset)'(BUSW);
end else begin
raddr <= raddr + 1;
raddr <= raddr + $bits(raddr)'(1);
read_offset <= 0;
read_delta <= 1;
if (raddr == waddr) begin

View File

@@ -6,125 +6,196 @@
"../rtl/VX_define.vh",
"../rtl/cache/VX_cache_config.vh"
],
"parameters": {
"L3_ENABLE": "`L3_ENABLE",
"L2_ENABLE": "`L2_ENABLE",
"NUM_CLUSTERS": "`NUM_CLUSTERS",
"NUM_CORES": "`NUM_CORES",
"DNUM_BANKS": "`DNUM_BANKS",
"INUM_BANKS": "`INUM_BANKS",
"SNUM_BANKS": "`SNUM_BANKS",
"L2NUM_BANKS": "`L2NUM_BANKS",
"L3NUM_BANKS": "`L3NUM_BANKS"
"modules": {
"*": {
"enabled": "(`NUM_CLUSTERS > 0)",
"submodules": {
"afu": {"type":"vortex_afu"}
}
},
"vortex_afu": {
"submodules": {
"vortex": {"type":"Vortex"}
}
},
"Vortex": {
"submodules": {
"cluster": {"type":"VX_cluster", "count":"`NUM_CLUSTERS"},
"l3cache": {"type":"VX_cache", "enabled":"`L3_ENABLE", "params":{"NUM_BANKS":"`L3NUM_BANKS"}}
}
},
"VX_cluster": {
"submodules": {
"core": {"type":"VX_core", "count":"`NUM_CORES"},
"l2cache": {"type":"VX_cache", "enabled":"`L2_ENABLE", "params":{"NUM_BANKS":"`L2NUM_BANKS"}}
}
},
"VX_core": {
"submodules": {
"pipeline": {"type":"VX_pipeline", "enabled":false},
"mem_unit": {"type":"VX_mem_unit", "enabled":true}
}
},
"VX_pipeline": {
"submodules": {
"fetch": {"type":"VX_fetch", "enabled":true},
"decode": {"type":"VX_decode", "enabled":true},
"issue": {"type":"VX_issue", "enabled":true},
"execute": {"type":"VX_execute", "enabled":true},
"commit": {"type":"VX_commit", "enabled":true}
}
},
"VX_fetch": {
"submodules": {
"warp_sched": {"type":"VX_warp_sched"},
"icache_stage": {"type":"VX_icache_stage"}
}
},
"VX_warp_sched": {},
"VX_icache_stage": {},
"VX_decode": {},
"VX_issue": {},
"VX_execute": {
"submodules": {
"lsu_unit": {"type":"VX_lsu_unit"},
"gpu_unit": {"type":"VX_gpu_unit"}
}
},
"VX_commit": {},
"VX_lsu_unit": {},
"VX_gpu_unit": {},
"VX_mem_unit": {
"submodules": {
"smem": {"type":"VX_cache", "params":{"NUM_BANKS":"`SNUM_BANKS"}},
"dcache": {"type":"VX_cache", "params":{"NUM_BANKS":"`DNUM_BANKS"}},
"icache": {"type":"VX_cache", "params":{"NUM_BANKS":"`INUM_BANKS"}}
}
},
"VX_cache": {
"submodules": {
"bank": {"type":"VX_bank", "count":"NUM_BANKS"}
}
},
"VX_bank": {}
},
"taps": {
"top::SCOPE_SIGNALS_AFU": {
"!scope_dram_req_valid": 1,
"scope_dram_req_addr": 32,
"scope_dram_req_rw": 1,
"scope_dram_req_byteen": "`VX_DRAM_BYTEEN_WIDTH",
"scope_dram_req_data": "`VX_DRAM_LINE_WIDTH",
"scope_dram_req_tag": "`VX_DRAM_TAG_WIDTH",
"!scope_dram_req_ready": 1,
"!scope_dram_rsp_valid": 1,
"scope_dram_rsp_data": "`VX_DRAM_LINE_WIDTH",
"scope_dram_rsp_tag": "`VX_DRAM_TAG_WIDTH",
"!scope_dram_rsp_ready": 1,
"!scope_snp_req_valid": 1,
"scope_snp_req_addr": 32,
"scope_snp_req_invalidate": 1,
"scope_snp_req_tag": "`VX_SNP_TAG_WIDTH",
"!scope_snp_req_ready": 1,
"!scope_snp_rsp_valid": 1,
"scope_snp_rsp_tag": "`VX_SNP_TAG_WIDTH",
"!scope_snp_rsp_ready": 1,
"scope_busy": 1
"afu": {
"!reset": 1,
"?dram_req_valid": 1,
"dram_req_addr": 32,
"dram_req_rw": 1,
"dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH",
"dram_req_data":"`VX_DRAM_LINE_WIDTH",
"dram_req_tag":"`VX_DRAM_TAG_WIDTH",
"?dram_req_ready": 1,
"?dram_rsp_valid": 1,
"dram_rsp_data":"`VX_DRAM_LINE_WIDTH",
"dram_rsp_tag":"`VX_DRAM_TAG_WIDTH",
"?dram_rsp_ready": 1,
"?snp_req_valid": 1,
"snp_req_addr": 32,
"snp_req_invalidate": 1,
"snp_req_tag":"`VX_SNP_TAG_WIDTH",
"?snp_req_ready": 1,
"?snp_rsp_valid": 1,
"snp_rsp_tag":"`VX_SNP_TAG_WIDTH",
"?snp_rsp_ready": 1,
"busy": 1
},
"core::SCOPE_SIGNALS_ISTAGE": {
"!scope_icache_req_valid": 1,
"scope_icache_req_wid": "`NW_BITS",
"scope_icache_req_addr": 32,
"scope_icache_req_tag": "`ICORE_TAG_ID_BITS",
"!scope_icache_req_ready": 1,
"!scope_icache_rsp_valid": 1,
"scope_icache_rsp_data": 32,
"scope_icache_rsp_tag": "`ICORE_TAG_ID_BITS",
"!scope_icache_rsp_ready": 1
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
"?icache_req_valid": 1,
"icache_req_wid":"`NW_BITS",
"icache_req_addr": 32,
"icache_req_tag":"`ICORE_TAG_ID_BITS",
"?icache_req_ready": 1,
"?icache_rsp_valid": 1,
"icache_rsp_data": 32,
"icache_rsp_tag":"`ICORE_TAG_ID_BITS",
"?icache_rsp_ready": 1
},
"core::SCOPE_SIGNALS_LSU": {
"!scope_dcache_req_valid": "`NUM_THREADS",
"scope_dcache_req_wid": "`NW_BITS",
"scope_dcache_req_pc": 32,
"scope_dcache_req_addr": "`NUM_THREADS * 32",
"scope_dcache_req_rw": 1,
"scope_dcache_req_byteen": "`NUM_THREADS * 4",
"scope_dcache_req_data": "`NUM_THREADS * 32",
"scope_dcache_req_tag": "`DCORE_TAG_ID_BITS",
"!scope_dcache_req_ready": 1,
"!scope_dcache_rsp_valid": "`NUM_THREADS",
"scope_dcache_rsp_data": "`NUM_THREADS * 32",
"scope_dcache_rsp_tag": "`DCORE_TAG_ID_BITS",
"!scope_dcache_rsp_ready": 1
"afu/vortex/cluster/core/pipeline/fetch/warp_sched": {
"?wsched_scheduled_warp": 1,
"wsched_active_warps": "`NUM_WARPS",
"wsched_schedule_table": "`NUM_WARPS",
"wsched_schedule_ready": "`NUM_WARPS",
"wsched_warp_to_schedule": "`NW_BITS",
"wsched_warp_pc": "32"
},
"core::SCOPE_SIGNALS_ISSUE": {
"!scope_issue_valid": 1,
"scope_issue_wid": "`NW_BITS",
"scope_issue_tmask": "`NUM_THREADS",
"scope_issue_pc": 32,
"scope_issue_ex_type": "`EX_BITS",
"scope_issue_op_type": "`OP_BITS",
"scope_issue_op_mod": "`MOD_BITS",
"scope_issue_wb": 1,
"scope_issue_rd": "`NR_BITS",
"scope_issue_rs1": "`NR_BITS",
"scope_issue_rs2": "`NR_BITS",
"scope_issue_rs3": "`NR_BITS",
"scope_issue_imm": 32,
"scope_issue_rs1_is_pc": 1,
"scope_issue_rs2_is_imm": 1,
"!scope_issue_ready": 1,
"scope_gpr_rsp_wid": "`NW_BITS",
"scope_gpr_rsp_pc": 32,
"scope_gpr_rsp_a": "`NUM_THREADS * 32",
"scope_gpr_rsp_b": "`NUM_THREADS * 32",
"scope_gpr_rsp_c": "`NUM_THREADS * 32",
"!scope_gpr_delay": 1,
"!scope_writeback_valid": 1,
"scope_writeback_wid": "`NW_BITS",
"scope_writeback_pc": 32,
"scope_writeback_rd": "`NR_BITS",
"scope_writeback_data": "`NUM_THREADS * 32",
"!scope_scoreboard_delay": 1,
"!scope_execute_delay": 1
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
"?gpu_req_valid": 1,
"gpu_req_wid": "`NW_BITS",
"gpu_req_tmask": "`NUM_THREADS",
"gpu_req_op_type": "`GPU_BITS",
"gpu_req_rs1": "32",
"gpu_req_rs2": "32",
"?gpu_req_ready": 1,
"?gpu_rsp_valid": 1,
"gpu_rsp_wid": "`NW_BITS",
"gpu_rsp_tmc": "`GPU_TMC_SIZE",
"gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE",
"gpu_rsp_split": "`GPU_SPLIT_SIZE",
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
},
"core::SCOPE_SIGNALS_EXECUTE": {},
"bank::SCOPE_SIGNALS_BANK": {
"!scope_bank_valid_st0": 1,
"!scope_bank_valid_st1": 1,
"!scope_bank_valid_st2": 1,
"scope_bank_addr_st0": 32,
"scope_bank_addr_st1": 32,
"scope_bank_addr_st2": 32,
"scope_bank_is_mrvq_st1": 1,
"scope_bank_miss_st1": 1,
"scope_bank_dirty_st1": 1,
"!scope_bank_force_miss_st1": 1,
"!scope_bank_stall_pipe": 1
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
"?dcache_req_valid":"`NUM_THREADS",
"dcache_req_wid":"`NW_BITS",
"dcache_req_pc": 32,
"dcache_req_addr":"`NUM_THREADS * 32",
"dcache_req_rw": 1,
"dcache_req_byteen":"`NUM_THREADS * 4",
"dcache_req_data": "`NUM_THREADS * 32",
"dcache_req_tag":"`DCORE_TAG_ID_BITS",
"?dcache_req_ready": 1,
"?dcache_rsp_valid":"`NUM_THREADS",
"dcache_rsp_data":"`NUM_THREADS * 32",
"dcache_rsp_tag":"`DCORE_TAG_ID_BITS",
"?dcache_rsp_ready": 1
},
"afu/vortex/cluster/core/pipeline/issue": {
"?issue_valid": 1,
"issue_wid":"`NW_BITS",
"issue_tmask":"`NUM_THREADS",
"issue_pc": 32,
"issue_ex_type":"`EX_BITS",
"issue_op_type":"`OP_BITS",
"issue_op_mod":"`MOD_BITS",
"issue_wb": 1,
"issue_rd":"`NR_BITS",
"issue_rs1":"`NR_BITS",
"issue_rs2":"`NR_BITS",
"issue_rs3":"`NR_BITS",
"issue_imm": 32,
"issue_rs1_is_pc": 1,
"issue_rs2_is_imm": 1,
"?issue_ready": 1,
"?gpr_rsp_valid": 1,
"gpr_rsp_wid":"`NW_BITS",
"gpr_rsp_pc": 32,
"gpr_rsp_a":"`NUM_THREADS * 32",
"gpr_rsp_b":"`NUM_THREADS * 32",
"gpr_rsp_c":"`NUM_THREADS * 32",
"!gpr_delay": 1,
"?writeback_valid": 1,
"writeback_wid":"`NW_BITS",
"writeback_pc": 32,
"writeback_rd":"`NR_BITS",
"writeback_data":"`NUM_THREADS * 32",
"!scoreboard_delay": 1,
"!execute_delay": 1
},
"afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": {
"?valid_st0": 1,
"?valid_st1": 1,
"?valid_st2": 1,
"addr_st0": 32,
"addr_st1": 32,
"addr_st2": 32,
"is_mrvq_st1": 1,
"miss_st1": 1,
"dirty_st1": 1,
"!force_miss_st1": 1,
"!stall_pipe": 1
}
}
},
"triggers": [
["scope_dram_req_valid", "scope_dram_req_ready"],
["scope_dram_rsp_valid", "scope_dram_rsp_ready"],
["scope_snp_req_valid", "scope_snp_req_ready"],
["scope_snp_rsp_valid", "scope_snp_rsp_ready"],
["scope_icache_req_valid_top", "scope_icache_req_ready_top"],
["scope_icache_rsp_valid_top", "scope_icache_rsp_ready_top"],
["scope_dcache_req_valid_top", "scope_dcache_req_ready_top"],
["scope_dcache_rsp_valid_top", "scope_dcache_rsp_ready_top"],
["scope_issue_valid_top", "scope_issue_ready_top"]
]
}

View File

@@ -11,12 +11,89 @@ vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$")
vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$")
vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)")
parameters = []
exclude_files = []
include_dirs = []
macros = []
br_stack = []
def translate_ternary(text):
def skip_space(text, i, ln, step):
while (i >= 0) and (i < ln):
c = text[i]
if not c.isspace():
break
i += step
return i
def skip_expr(text, i, ln, step):
paren = 0
checkparen = True
while (i >= 0) and (i < ln):
c = text[i]
if checkparen and (((step < 0) and (c == ')')) or ((step > 0) and (c == '('))):
paren += 1
elif checkparen and (((step < 0) and (c == '(')) or ((step > 0) and (c == ')'))):
if (0 == paren):
break
paren -= 1
if (0 == paren):
i = skip_space(text, i + step, ln, step)
checkparen = False
continue
elif (0 == paren) and not (c.isalnum() or (c == '_')):
break
i += step
return (i - step)
def parse_ternary(text):
ternary = None
ln = len(text)
for i in range(1, ln):
c = text[i]
if not (c == '?'):
continue
# parse condition expression
i0 = skip_space(text, i - 1, ln, -1)
if (i < 0):
raise Exception("invalid condition expression")
i1 = skip_expr(text, i0, ln, -1)
if (i1 > i0):
raise Exception("invalid condition expression")
# parse true expression
i2 = skip_space(text, i + 1, ln, 1)
if (i2 >= ln):
raise Exception("invalid true expression")
i3 = skip_expr(text, i2, ln, 1)
if (i3 < i2):
raise Exception("invalid true expression")
# parse colon
i4 = skip_space(text, i3 + 1, ln, 1)
if (i4 >= ln):
raise Exception("invalid colon")
if not (text[i4] == ':'):
raise Exception("missing colon")
# parse false expression
i5 = skip_space(text, i4 + 1, ln, 1)
if (i5 >= ln):
raise Exception("invalid false expression")
i6 = skip_expr(text, i5, ln, 1)
if (i6 < i5):
raise Exception("invalid false expression")
ternary = (i0, i1, i2, i3, i5, i6)
break
return ternary
while True:
pos = parse_ternary(text)
if pos is None:
break
# convert to python ternary
newText = text[:pos[1]] + text[pos[2]:pos[3]+1] + " if " + text[pos[1]:pos[0]+1] + " else " + text[pos[4]:pos[5]+1] + text[pos[5]+1:]
text = newText
return text
def parse_func_args(text):
args = []
arg = ''
@@ -26,7 +103,6 @@ def parse_func_args(text):
paren = 1
for i in range(1, l):
c = text[i]
if c == '(':
paren += 1
elif c == ')':
@@ -36,17 +112,14 @@ def parse_func_args(text):
if paren == 0:
l = i
break
if c == ',' and paren == 1:
if arg.strip():
args.append(arg)
arg = ''
else:
arg += c
if paren != 0:
raise Exception("missing closing parenthesis: " + text)
if arg.strip():
args.append(arg)
@@ -90,9 +163,29 @@ def find_macro(name):
return macro
return None
def expand_text(text):
def expand_text(text, params):
class DoRepl(object):
def re_pattern_args(args):
p = "(?<![0-9a-zA-Z_])("
i = 0
for arg in args:
if i > 0:
p += "|"
p += arg
i += 1
p += ")(?![0-9a-zA-Z_])"
return p
class DoReplParam(object):
def __init__(self, params):
self.params = params
self.expanded = False
def __call__(self, match):
name = match.group(1)
self.expanded = True
return self.params[name]
class DoReplMacro(object):
def __init__(self):
self.expanded = False
self.has_func = False
@@ -107,17 +200,6 @@ def expand_text(text):
return macro[2]
return "`" + name
class DoRepl2(object):
def __init__(self, args, f_args):
map = {}
for i in range(len(args)):
map[args[i]] = f_args[i]
self.map = map
def __call__(self, match):
for key in match.groups():
return self.map[key]
return group
def repl_func_macro(text):
expanded = False
match = re.search(vl_expand_re, text)
@@ -137,14 +219,11 @@ def expand_text(text):
if len(args) != len(f_args[0]):
raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args))
pattern = "(?<![0-9a-zA-Z_])("
pattern = re_pattern_args(args)
params = {}
for i in range(len(args)):
if i > 0:
pattern += "|"
pattern += args[i]
pattern += ")(?![0-9a-zA-Z_])"
dorepl = DoRepl2(args, f_args[0])
params[args[i]] = f_args[0][i]
dorepl = DoReplParam(params)
value = re.sub(pattern, dorepl, value)
str_head = text[0:match.start()]
@@ -163,10 +242,18 @@ def expand_text(text):
raise Exception("Macro recursion!")
has_func = False
while True:
do_repl = DoRepl()
params_updated = False
if not params is None:
do_repl = DoReplParam(params)
pattern = re_pattern_args(params)
new_text = re.sub(pattern, do_repl, text)
if do_repl.expanded:
text = new_text
params_updated = True
do_repl = DoReplMacro()
new_text = re.sub(vl_expand_re, do_repl, text)
has_func = do_repl.has_func
if not do_repl.expanded:
if not (params_updated or do_repl.expanded):
break
text = new_text
changed = True
@@ -291,7 +378,28 @@ def load_config(filename):
print("condfig=", config)
return config
def gen_vl_header(file, taps, triggers):
def eval_node(text, params):
def clog2(x):
l2 = math.log2(x)
cl = math.ceil(l2)
return int(cl)
if not type(text) == str:
return text
expanded = expand_text(text, params)
if expanded:
text = expanded
try:
__text = text.replace('$clog2', '__clog2')
__text = translate_ternary(__text)
e = eval(__text, {'__clog2': clog2})
return e
except (NameError, SyntaxError):
return text
def gen_vl_header(file, modules, taps):
header = '''
`ifndef VX_SCOPE_DEFS
@@ -299,238 +407,274 @@ def gen_vl_header(file, taps, triggers):
'''
footer = '`endif'
def signal_size(size, asize):
str_asize = ""
for s in asize:
if type(s) == int:
str_asize += "[" + str(s-1) + ":0]"
else:
str_asize += "[" + str(s) + "-1:0]"
def signal_size(size, mn):
if type(size) == int:
size1 = (size-1)
if size1 != 0:
return str_asize + "[" + str(size1) + ":0]"
if (size != mn):
return "[" + str(size-1) + ":0]"
else:
return str_asize
return ""
else:
return str_asize + "[(" + size + ")-1:0]"
return "[" + size + "-1:0]"
def generate_ports(tclass, tap, ports, new_taps):
def create_signal(key, ports):
if not key in ports:
ports[key] = []
return ports[key]
def emit_io(tap, ports, prefix, asize, return_list, new_taps, is_enabled):
stap = tap + "_IO"
new_taps.append(stap)
print("`define " + stap + " \\", file=f)
if is_enabled:
for key in ports:
size = ports[key]
name = key
is_trigger = False
if name[0] == '!':
name = name[1:]
is_trigger = True
if not return_list is None:
return_list.append((name + prefix, size, asize, is_trigger))
print("\toutput wire" + signal_size(size, asize) + " " + name + prefix + ", \\", file=f)
print("", file=f)
emit_bind(tap, ports, prefix, prefix, new_taps, is_enabled)
def emit_bind(tap, ports, from_prefix, to_prefix, new_taps, is_enabled):
stap = tap + "_BIND"
new_taps.append(stap)
print("`define " + stap + " \\", file=f)
for key in ports:
name = key
if name[0] == '!':
name = name[1:]
if is_enabled:
print("\t." + name + to_prefix + " (" + name + from_prefix + "), \\", file=f)
else:
if (from_prefix != to_prefix):
print("\t`UNUSED_PIN (" + name + to_prefix + "), \\", file=f)
print("", file=f)
def emit_select(tap, ports, from_prefix, to_prefix, new_taps, is_enabled):
stap = tap + "_SELECT(__i__)"
new_taps.append(stap)
print("`define " + stap + " \\", file=f)
if is_enabled:
for key in ports:
name = key
if name[0] == '!':
name = name[1:]
print("\t." + name + to_prefix + " (" + name + from_prefix + "[__i__]), \\", file=f)
print("", file=f)
def do_top(tap, ports, new_taps):
out_ports = []
for p in ports:
name = p
is_trigger = False
if name[0] == '!':
name = name[1:]
is_trigger = True
out_ports.append((name, ports[p], [], is_trigger))
return out_ports
def do_core(tap, ports, new_taps):
out_ports = []
nclusters = parameters["NUM_CLUSTERS"]
ncores = parameters["NUM_CORES"]
emit_io(tap + "_TOP", ports, "_top", [nclusters, ncores], out_ports, new_taps, True)
emit_io(tap + "_CLUSTER", ports, "_cluster", [ncores], None, new_taps, True)
emit_io(tap + "", ports, "", [], None, new_taps, True)
emit_select(tap + "_CLUSTER", ports, "_top", "_cluster", new_taps, True)
emit_select(tap + "", ports, "_cluster", "", new_taps, True)
return out_ports
def do_bank(tap, ports, new_taps):
out_ports = []
nclusters = parameters["NUM_CLUSTERS"]
ncores = parameters["NUM_CORES"]
has_l3 = (parameters["L3_ENABLE"] != 0)
has_l2 = (parameters["L2_ENABLE"] != 0)
emit_io(tap + "_L3_TOP", ports, "_l3_cache", [parameters["L3NUM_BANKS"]], out_ports, new_taps, has_l3)
emit_io(tap + "_L2_TOP", ports, "_l2_top", [nclusters, parameters["L2NUM_BANKS"]], out_ports, new_taps, has_l2)
emit_io(tap + "_L1D_TOP", ports, "_l1d_top", [nclusters, ncores, parameters["DNUM_BANKS"]], out_ports, new_taps, True)
emit_io(tap + "_L1I_TOP", ports, "_l1i_top", [nclusters, ncores, parameters["INUM_BANKS"]], out_ports, new_taps, True)
emit_io(tap + "_L1S_TOP", ports, "_l1s_top", [nclusters, ncores, parameters["SNUM_BANKS"]], out_ports, new_taps, True)
emit_io(tap + "_L2_CLUSTER", ports, "_l2_cache", [parameters["L2NUM_BANKS"]], None, new_taps, has_l2)
emit_io(tap + "_L1D_CLUSTER", ports, "_l1d_cluster", [ncores, parameters["DNUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_L1I_CLUSTER", ports, "_l1i_cluster", [ncores, parameters["INUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_L1S_CLUSTER", ports, "_l1s_cluster", [ncores, parameters["SNUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_L1D_CORE", ports, "_l1d_cache", [parameters["DNUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_L1I_CORE", ports, "_l1i_cache", [parameters["INUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_L1S_CORE", ports, "_l1s_cache", [parameters["SNUM_BANKS"]], None, new_taps, True)
emit_io(tap + "_CACHE", ports, "_cache", ["NUM_BANKS"], None, new_taps, True)
emit_io(tap + "", ports, "", [], None, new_taps, True)
emit_select(tap + "_L2_CLUSTER", ports, "_l2_top", "_l2_cache", new_taps, has_l2)
emit_select(tap + "_L1D_CLUSTER", ports, "_l1d_top", "_l1d_cluster", new_taps, True)
emit_select(tap + "_L1I_CLUSTER", ports, "_l1i_top", "_l1i_cluster", new_taps, True)
emit_select(tap + "_L1S_CLUSTER", ports, "_l1s_top", "_l1s_cluster", new_taps, True)
emit_select(tap + "_L1D_CORE", ports, "_l1d_cluster", "_l1d_cache", new_taps, True)
emit_select(tap + "_L1I_CORE", ports, "_l1i_cluster", "_l1i_cache", new_taps, True)
emit_select(tap + "_L1S_CORE", ports, "_l1s_cluster", "_l1s_cache", new_taps, True)
emit_bind(tap + "_L3_CACHE", ports, "_l3_cache", "_cache", new_taps, has_l3)
emit_bind(tap + "_L2_CACHE", ports, "_l2_cache", "_cache", new_taps, has_l2)
emit_bind(tap + "_L1D_CACHE", ports, "_l1d_cache", "_cache", new_taps, True)
emit_bind(tap + "_L1I_CACHE", ports, "_l1i_cache", "_cache", new_taps, True)
emit_bind(tap + "_L1S_CACHE", ports, "_l1s_cache", "_cache", new_taps, True)
emit_select(tap + "", ports, "_cache", "", new_taps, True)
return out_ports
callbacks = {
"top": do_top,
"core": do_core,
"bank": do_bank
}
return callbacks[tclass](tap, ports, new_taps)
def trigger_size(name, ports):
for port in ports:
if port[0] == name:
return (port[1], port[2])
return None
def trigger_prefices(asize):
def Q(arr, ss, asize, idx, N):
for i in range(asize[idx]):
tmp = ss + '[' + str(i) + ']'
if (idx + 1) < N:
Q(arr, tmp, asize, idx + 1, N)
else:
arr.append(tmp)
l = len(asize)
if l == 0:
return [""]
arr = []
Q(arr, "", asize, 0, l)
return arr
def dic_insert(gdic, ldic, key, value, enabled):
if enabled:
ldic[key] = value
if key in gdic:
return False
if enabled:
gdic[key] = None
return True
def trigger_name(name, size):
if type(size) == int:
size1 = (size-1)
if size1 != 0:
if size != 1:
return "(| " + name + ")"
else:
return name
else:
return "(| " + name + ")"
with open(file, 'w') as f:
print(header, file=f)
def trigger_subscripts(asize):
def Q(arr, ss, asize, idx, N):
a = asize[idx]
if (a != 0):
for i in range(a):
tmp = ss + '[' + str(i) + ']'
if (idx + 1) < N:
Q(arr, tmp, asize, idx + 1, N)
else:
arr.append(tmp)
else:
if (idx + 1) < N:
Q(arr, ss, asize, idx + 1, N)
else:
arr.append(ss)
all_ports = []
new_taps = []
if asize is None:
return [""]
ln = len(asize)
if (0 == ln):
return [""]
arr = []
Q(arr, "", asize, 0, ln)
return arr
def visit_path(alltaps, ports, path, node, paths, modules, taps):
ntype = node["type"]
enabled = True
if "enabled" in node:
enabled = eval_node(node["enabled"], None)
curtaps = {}
if (len(paths) != 0):
spath = paths.pop(0)
snodes = modules[ntype]["submodules"]
if not spath in snodes:
raise Exception("invalid path: " + spath + " in " + path)
snode = snodes[spath]
subtaps = visit_path(alltaps, ports, spath, snode, paths, modules, taps)
scount = 0
if "count" in snode:
scount = eval_node(snode["count"], None)
params = None
if "params" in snode:
params = snode["params"]
new_staps = []
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for key in subtaps:
subtap = subtaps[key]
s = subtap[0]
a = subtap[1]
t = subtap[2]
e = subtap[3]
s = eval_node(s, params)
e = eval_node(e, params)
if type(e) == str or type(enabled) == str:
me = str(e) + " and " + str(enabled)
else:
me = e and enabled
aa = [scount]
sa = signal_size(scount, 0)
if a:
for i in a:
x = eval_node(i, params)
aa.append(x)
sa += signal_size(x, 0)
if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t, me), e):
skey = key.replace('/', '_')
if e:
pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',')
new_staps.append(skey)
ports[nn] = pp
if (0 == scount):
nn = "SCOPE_BIND_" + ntype + '_' + spath + "()"
pp = create_signal(nn, ports)
for st in new_staps:
if e:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)"
pp = create_signal(nn, ports)
for st in new_staps:
if e:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for tk in taps:
trigger = 0
name = tk
size = eval_node(taps[tk], None)
if name[0] == '!':
name = name[1:]
trigger = 1
elif name[0] == '?':
name = name[1:]
trigger = 2
if dic_insert(alltaps, curtaps, name, (size, None, trigger, enabled), True):
pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',')
ports[nn] = pp
return curtaps
toptaps = {}
with open(file, 'w') as f:
top = modules['*']
snodes = top["submodules"]
ports = {}
alltaps = {}
for key in taps:
[tclass, tap] = key.split('::')
ports = generate_ports(tclass, tap, taps[key], new_taps)
for port in ports:
all_ports.append(port)
skey_list = key.split(',')
_taps = taps[key]
for skey in skey_list:
print('processing node: ' + skey + ' ...')
paths = skey.strip().split('/')
spath = paths.pop(0)
if not spath in snodes:
raise Exception("invalid path: " + spath)
snode = snodes[spath]
curtaps = visit_path(alltaps, ports, spath, snode, paths, modules, _taps)
for tk in curtaps:
toptaps[tk] = curtaps[tk]
print("`define SCOPE_SIGNALS_DECL \\", file=f)
print(header, file=f)
for key in ports:
print("`define " + key + ' \\', file=f)
for port in ports[key]:
print(port + ' \\', file=f)
print("", file=f)
print("`define SCOPE_DECL_SIGNALS \\", file=f)
i = 0
for port in all_ports:
for key in toptaps:
tap = toptaps[key]
name = key.replace('/', '_')
size = tap[0]
asize = tap[1]
enabled = tap[3]
sa = ""
if asize:
for a in asize:
sa += signal_size(a, 0)
if i > 0:
print(" \\", file=f)
print("\twire" + signal_size(port[1], port[2]) + " " + port[0] + ";", file=f, end='')
if not enabled:
print("`IGNORE_WARNINGS_BEGIN \\", file=f)
print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + '; \\', file=f)
print("`IGNORE_WARNINGS_END", file=f, end='')
else:
print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_SIGNALS_DATA_LIST \\", file=f)
print("`define SCOPE_DATA_LIST \\", file=f)
i = 0
for port in all_ports:
if port[3]:
for key in toptaps:
tap = toptaps[key]
if tap[2] != 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t" + port[0], file=f, end='')
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_SIGNALS_UPD_LIST \\", file=f)
print("`define SCOPE_UPDATE_LIST \\", file=f)
i = 0
for port in all_ports:
if not port[3]:
for key in toptaps:
tap = toptaps[key]
if tap[2] == 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t" + port[0], file=f, end='')
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_TRIGGERS \\", file=f)
print("`define SCOPE_TRIGGER \\", file=f)
i = 0
for trigger in triggers:
arr = trigger_size(trigger[0], all_ports)
if arr is None:
excluded_list = []
for key in toptaps:
if key in excluded_list:
continue
[size, asize] = arr
for prefix in trigger_prefices(asize):
tap = toptaps[key]
if tap[2] != 2:
continue
size = tap[0]
asize = tap[1]
sus = trigger_subscripts(asize)
for su in sus:
if i > 0:
print(" | \\", file=f)
print("\t(", file=f, end='')
for j in range(len(trigger)):
if j > 0:
print(" && ", file=f, end='')
print(trigger_name(trigger[j] + prefix, size), file=f, end='')
name = trigger_name("scope_" + key.replace('/', '_') + su, size)
if key.endswith("_valid"):
ready_signal = key[:-6] + "_ready"
if ready_signal in toptaps:
rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size)
print(name + " && " + rname, file=f, end='')
excluded_list.append(ready_signal)
else:
print(name, file=f, end='')
else:
print(name, file=f, end='')
print(")", file=f, end='')
i += 1
print("", file=f)
@@ -538,69 +682,110 @@ def gen_vl_header(file, taps, triggers):
print(footer, file=f)
return all_ports
return toptaps
def gen_cc_header(file, ports):
def gen_cc_header(file, taps):
header = '''
#pragma once\n
struct scope_signal_t {
#pragma once
struct scope_module_t {
const char* name;
int index;
int parent;
};
struct scope_tap_t {
int width;
const char* name;
};\n
inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n
static constexpr scope_signal_t scope_signals[] = {'''
footer = "};"
def eval_macro(text):
expanded = expand_text(text)
if expanded:
text = expanded
text = text.replace('$clog2', '__clog2')
return text
def asize_name(asize):
def Q(arr, ss, asize, idx, N):
for i in range(asize[idx]):
tmp = ss + "_" + str(i)
int module;
};
'''
def flatten_path(paths, sizes):
def Q(arr, ss, idx, N, paths, sizes):
size = sizes[idx]
if size != 0:
for i in range(sizes[idx]):
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx] + '_' + str(i)
if (idx + 1) < N:
Q(arr, tmp, asize, idx + 1, N)
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
else:
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx]
if (idx + 1) < N:
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
l = len(asize)
if l == 0:
return [""]
arr = []
Q(arr, "", asize, 0, l)
Q(arr, "", 0, len(asize), paths, asize)
return arr
# flatten the taps
fdic = {}
for key in taps:
tap = taps[key]
size = str(tap[0])
paths = key.split('/')
if (len(paths) > 1):
name = paths.pop(-1)
asize = tap[1]
for ss in flatten_path(paths, asize):
fdic[ss + '/' + name ] = [size, -1]
else:
fdic[key] = [size, -1]
# generate module dic
mdic = {}
for key in fdic:
paths = key.split('/')
if len(paths) == 1:
continue
paths.pop(-1)
parent = -1
for path in paths:
if not path in mdic:
index = len(mdic)
mdic[path] = (index, parent)
parent = index
else:
parent = mdic[path][0]
fdic[key][1] = parent
with open(file, 'w') as f:
print(header, file=f)
print("static constexpr scope_module_t scope_modules[] = {", file=f)
i = 0
for port in ports:
if port[3]:
continue
name = port[0]
size = eval_macro(str(port[1]))
for ss in asize_name(port[2]):
for key in mdic:
m = mdic[key]
if i > 0:
print(",", file=f)
print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='')
i += 1
for port in ports:
if not port[3]:
continue
name = port[0]
size = eval_macro(str(port[1]))
for ss in asize_name(port[2]):
if i > 0:
print(",", file=f)
print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='')
print(',', file=f)
print("\t{\"" + key + "\", " + str(m[0]) + ", " + str(m[1]) + "}", file=f, end='')
i += 1
print("", file=f)
print(footer, file=f)
print("};", file=f)
print("", file=f)
print("static constexpr scope_tap_t scope_taps[] = {", file=f)
i = 0
for key in fdic:
size = fdic[key][0]
parent = fdic[key][1]
paths = key.split('/')
if len(paths) > 1:
name = paths.pop(-1)
else:
name = key
if i > 0:
print(',', file=f)
print("\t{" + size + ", \"" + name + "\", " + str(parent) + "}", file=f, end='')
i += 1
print("", file=f)
print("};", file=f)
def main():
parser = argparse.ArgumentParser(description='Scope headers generator.')
@@ -612,7 +797,6 @@ def main():
args = parser.parse_args()
print("args=", args)
global parameters
global exclude_files
global include_dirs
global macros
@@ -631,12 +815,8 @@ def main():
if "includes" in config:
parse_includes(config["includes"])
parameters = config["parameters"]
for key in parameters:
parameters[key] = int(eval(expand_text(str(parameters[key]))))
taps = gen_vl_header(args.vl, config["modules"], config["taps"])
gen_cc_header(args.cc, taps)
ports = gen_vl_header(args.vl, config["taps"], config["triggers"])
gen_cc_header(args.cc, ports)
if __name__ == "__main__":
if __name__ == '__main__':
main()

View File

@@ -51,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,17 +1,22 @@
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_bypass_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_cam_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_elastic_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_index_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_multiplier.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_onehot_encooder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_serial_div.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_shift_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_skid_buffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v
@@ -20,114 +25,72 @@ read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I..
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_store.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_alu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cmt_to_csr_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_to_issue_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_decode_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exu_to_cmt_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_cmt_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_csr_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_mul_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_writeback_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_alu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_back_end.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_cluster.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_commit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_core.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_data.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_io_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_pipe.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_d_e_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_dcache_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_decode.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_exec_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_f_d_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_execute.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fetch.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_front_end.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fpu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_bypass.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_fp_ctrl.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_ram.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_stage.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_wrapper.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_inst.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_i_d_reg.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ibuffer.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_icache_stage.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_inst_multiplex.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_instr_demux.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_io_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ipdom_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_issue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_lsu_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mul_unit.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_pipeline.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scheduler.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_user_config.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scoreboard.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp_sched.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_writeback.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/Vortex.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_req_bank_sel.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_rsp_merge.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_mgr.v
read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_unit.v
hierarchy -check -top Vortex
add -global_input reset 1
proc -global_arst reset