diff --git a/driver/opae/Makefile b/driver/opae/Makefile index b13b897d..34991e2f 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -18,7 +18,10 @@ CXXFLAGS +=-fstack-protector CXXFLAGS += -fPIC # Enable scope analyzer -#CXXFLAGS += -DSCOPE +CXXFLAGS += -DSCOPE + +# config parameters +CXXFLAGS += -DNUM_WARPS=2 -DNUM_THREADS=2 LDFLAGS += -luuid diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index d3051d7e..78d01303 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -25,31 +25,34 @@ struct scope_signal_t { const char* name; }; +constexpr int ilog2(int n) { + return (n > 1) ? 1 + ilog2(n >> 1) : 0; +} + +static constexpr int NW_BITS = ilog2(NUM_WARPS); + static const scope_signal_t scope_signals[] = { - { 2, "icache_req_warp_num" }, + { NW_BITS, "icache_req_warp_num" }, { 32, "icache_req_addr" }, - { 2, "icache_req_tag" }, - + { NW_BITS, "icache_req_tag" }, { 32, "icache_rsp_data" }, - { 2, "icache_rsp_tag" }, + { NW_BITS, "icache_rsp_tag" }, - { 2, "dcache_req_warp_num" }, + { NW_BITS, "dcache_req_warp_num" }, { 32, "dcache_req_curr_PC" }, { 32, "dcache_req_addr" }, { 1, "dcache_req_rw" }, { 4, "dcache_req_byteen" }, { 32, "dcache_req_data" }, - { 2, "dcache_req_tag" }, - + { NW_BITS, "dcache_req_tag" }, { 32, "dcache_rsp_data" }, - { 2 , "dcache_rsp_tag" }, + { NW_BITS, "dcache_rsp_tag" }, { 32, "dram_req_addr" }, { 1, "dram_req_rw" }, { 16, "dram_req_byteen" }, { 32, "dram_req_data" }, { 29, "dram_req_tag" }, - { 32, "dram_rsp_data" }, { 29, "dram_rsp_tag" }, @@ -58,30 +61,32 @@ static const scope_signal_t scope_signals[] = { { 16, "snp_req_tag" }, { 16, "snp_rsp_tag" }, - { 2, "decode_warp_num" }, + { NW_BITS, "decode_warp_num" }, { 32, "decode_curr_PC" }, - { 1, "decode_is_jal" }, - { 5, "decode_rs1" }, - { 5, "decode_rs2" }, + { 1, "decode_is_jal" }, + { 5, "decode_rs1" }, + { 5, "decode_rs2" }, - { 2, "execute_warp_num" }, + { NW_BITS, "execute_warp_num" }, { 5, "execute_rd" }, { 32, "execute_a" }, { 32, "execute_b" }, - { 2, "writeback_warp_num" }, + { NW_BITS, "writeback_warp_num" }, { 2, "writeback_wb" }, { 5, "writeback_rd" }, { 32, "writeback_data" }, + /////////////////////////////////////////////////////////////////////////// + { 1, "icache_req_valid" }, { 1, "icache_req_ready" }, { 1, "icache_rsp_valid" }, { 1, "icache_rsp_ready" }, - { 4, "dcache_req_valid" }, + { NUM_THREADS, "dcache_req_valid" }, { 1, "dcache_req_ready" }, - { 4, "dcache_rsp_valid" }, + { NUM_THREADS, "dcache_rsp_valid" }, { 1, "dcache_rsp_ready" }, { 1, "dram_req_valid" }, @@ -94,14 +99,19 @@ static const scope_signal_t scope_signals[] = { { 1, "snp_rsp_valid" }, { 1, "snp_rsp_ready" }, - { 4, "decode_valid" }, - { 4, "execute_valid" }, - { 4, "writeback_valid" }, + { NUM_THREADS, "decode_valid" }, + { NUM_THREADS, "execute_valid" }, + { NUM_THREADS, "writeback_valid" }, { 1, "schedule_delay" }, { 1, "memory_delay" }, { 1, "exec_delay" }, { 1, "gpr_stage_delay" }, { 1, "busy" }, + + { 1, "idram_req_valid" }, + { 1, "idram_req_ready" }, + { 1, "idram_rsp_valid" }, + { 1, "idram_rsp_ready" }, }; static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); @@ -161,7 +171,10 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1)); - assert(fwidth == (int)frame_width); + if (fwidth != (int)frame_width) { + std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl; + std::abort(); + } std::vector signal_data(frame_width+1); uint64_t frame_offset = 0; diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index b7548092..7e9761ff 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -13,18 +13,19 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ -DDBG_PRINT_WB \ -DDBG_PRINT_OPAE -#DBG_PRINT=$(DBG_PRINT_FLAGS) +DBG_PRINT=$(DBG_PRINT_FLAGS) -#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 +#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 +CONFIGS += -DNUM_WARPS=2 -DNUM_THREADS=2 -#DEBUG=1 -#AFU=1 +DEBUG=1 +AFU=1 CFLAGS += -fPIC -CFLAGS += -DUSE_RTLSIM $(MULTICORE) +CFLAGS += -DUSE_RTLSIM $(CONFIGS) LDFLAGS += -shared -pthread # LDFLAGS += -dynamiclib -pthread @@ -35,7 +36,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache -VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE) +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS) VL_FLAGS += -Wno-DECLFILENAME VL_FLAGS += --x-initial unique VL_FLAGS += --x-assign unique @@ -47,9 +48,11 @@ VL_FLAGS += --x-assign unique # Debugigng ifdef DEBUG VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT) - CFLAGS += -DVCD_OUTPUT $(DBG_PRINT) + CFLAGS += -DVCD_OUTPUT $(DBG_PRINT) + #VL_FLAGS += -DDBG_CORE_REQ_INFO + #CFLAGS += -DDBG_CORE_REQ_INFO else - CFLAGS += -DNDEBUG + CFLAGS += -DNDEBUG VL_FLAGS += -DNDEBUG endif diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index 8199c0dc..19277f92 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -44,16 +44,16 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ run-fpga: $(PROJECT) - LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 32 run-ase: $(PROJECT) - ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 run-rtlsim: $(PROJECT) - LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 run-simx: $(PROJECT) - LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 118edcc2..55386122 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -1,10 +1,9 @@ #include #include +#include #include #include "common.h" -int test = -1; - #define RT_CHECK(_expr) \ do { \ int _ret = _expr; \ @@ -15,79 +14,84 @@ int test = -1; exit(-1); \ } while (false) +const char* kernel_file = "kernel.bin"; +int test = -1; +uint32_t count = 0; + +vx_device_h device = nullptr; +vx_buffer_h buffer = nullptr; + +static void show_usage() { + std::cout << "Vortex Driver Test." << std::endl; + std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl; +} + static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "t:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { switch (c) { - case 't': { + case 'n': + count = atoi(optarg); + break; + case 't': test = atoi(optarg); - } break; + break; + case 'k': + kernel_file = optarg; + break; case 'h': case '?': { - std::cout << "Test." << std::endl; - std::cout << "Usage: [-t testno][-h: help]" << std::endl; + show_usage(); exit(0); } break; default: + show_usage(); exit(-1); } } } +void cleanup() { + if (buffer) { + vx_buf_release(buffer); + } + if (device) { + vx_dev_close(device); + } +} + uint64_t shuffle(int i, uint64_t value) { return (value << i) | (value & ((1 << i)-1));; } -vx_device_h device = nullptr; -vx_buffer_h sbuf = nullptr; -vx_buffer_h dbuf = nullptr; - -int total_blocks = NUM_BLOCKS; - -void cleanup() { - if (sbuf) { - vx_buf_release(sbuf); - } - if (dbuf) { - vx_buf_release(dbuf); - } - if (device) { - vx_dev_close(device); - } -} - -int run_memcopy_test(vx_buffer_h sbuf, - vx_buffer_h dbuf, - uint32_t address, - uint64_t value, - int num_blocks) { +int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { int errors = 0; - // write sbuf data + // update source buffer for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - ((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value); - } - - // clear dbuf data - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - ((uint64_t*)vx_host_ptr(dbuf))[i] = 0; + ((uint64_t*)vx_host_ptr(buffer))[i] = shuffle(i, value); } // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; - RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0)); + RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0)); + + // clear destination buffer + for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + ((uint64_t*)vx_host_ptr(buffer))[i] = 0; + } // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; - RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0)); + RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0)); // verify result std::cout << "verify result" << std::endl; for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i]; + auto curr = ((uint64_t*)vx_host_ptr(buffer))[i]; auto ref = shuffle(i, value); if (curr != ref) { - std::cout << "error at 0x" << std::hex << (address + 8 * i) + std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i) << ": actual 0x" << curr << ", expected 0x" << ref << std::endl; ++errors; } @@ -102,35 +106,19 @@ int run_memcopy_test(vx_buffer_h sbuf, return 0; } -int run_kernel_test(vx_device_h device, - vx_buffer_h sbuf, - vx_buffer_h dbuf, - const char* program, - int num_blocks) { - int errors = 0; - - uint64_t seed = 0x0badf00d40ff40ff; +int run_kernel_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + int errors = 0; - int src_dev_addr = DEV_MEM_SRC_ADDR; - int dest_dev_addr = DEV_MEM_DST_ADDR; - - // write sbuf data - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - ((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed); - } - - // clear dbuf data - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - ((uint64_t*)vx_host_ptr(dbuf))[i] = 0; + // update source buffer + for (uint32_t i = 0; i < num_points; ++i) { + ((int32_t*)vx_host_ptr(buffer))[i] = i; } // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; - RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0)); - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, program)); + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0)); // start device std::cout << "start device" << std::endl; @@ -142,19 +130,24 @@ int run_kernel_test(vx_device_h device, // flush the caches std::cout << "flush the caches" << std::endl; - RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks)); + RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); + + // clear destination buffer + for (uint32_t i = 0; i < num_points; ++i) { + ((int32_t*)vx_host_ptr(buffer))[i] = 0; + } // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; - RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0)); + RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { - auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i]; - auto ref = shuffle(i, seed); + for (uint32_t i = 0; i < num_points; ++i) { + int32_t curr = ((int32_t*)vx_host_ptr(buffer))[i]; + int32_t ref = i; if (curr != ref) { - std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i) + std::cout << "error at value " << i << ": actual 0x" << curr << ", expected 0x" << ref << std::endl; ++errors; } @@ -170,33 +163,66 @@ int run_kernel_test(vx_device_h device, } int main(int argc, char *argv[]) { + size_t value; + kernel_arg_t kernel_arg; + // parse command arguments parse_args(argc, argv); - std::cout << "total blocks: " << total_blocks << std::endl; + if (count == 0) { + count = 1; + } + + uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); + uint32_t num_points = max_cores * count; + uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64; + uint32_t buf_size = num_blocks * 64; + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; // open device connection std::cout << "open device connection" << std::endl; RT_CHECK(vx_dev_open(&device)); - // create source buffer - std::cout << "create source buffer" << std::endl; - RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &sbuf)); - - // create destination buffer - std::cout << "create destination buffer" << std::endl; - RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &dbuf)); + // allocate device memory + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.dst_ptr = value; + + kernel_arg.count = count; + + std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + + // allocate shared memory + std::cout << "allocate shared memory" << std::endl; + uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); + RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); // run tests if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; - RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1)); - RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, total_blocks)); + RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1)); + RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); } if (1 == test || -1 == test) { + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + { + auto buf_ptr = (void*)vx_host_ptr(buffer); + memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + } + std::cout << "run kernel test" << std::endl; - RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin", total_blocks)); + RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points)); } // cleanup diff --git a/driver/tests/basic/common.h b/driver/tests/basic/common.h index 0ac862d3..bedbface 100644 --- a/driver/tests/basic/common.h +++ b/driver/tests/basic/common.h @@ -1,8 +1,12 @@ #ifndef _COMMON_H_ #define _COMMON_H_ -#define DEV_MEM_SRC_ADDR 0x10000040 -#define DEV_MEM_DST_ADDR 0x20000080 -#define NUM_BLOCKS 16 +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +struct kernel_arg_t { + uint32_t count; + uint32_t src_ptr; + uint32_t dst_ptr; +}; #endif \ No newline at end of file diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 3845199a..87fd4c93 100644 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/driver/tests/basic/kernel.c b/driver/tests/basic/kernel.c index 96401c43..372b1ee9 100644 --- a/driver/tests/basic/kernel.c +++ b/driver/tests/basic/kernel.c @@ -4,17 +4,14 @@ #include "common.h" void main() { - int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR; - int64_t* y = (int64_t*)DEV_MEM_DST_ADDR; - int num_words = (NUM_BLOCKS * 64) / 8; - - int core_id = vx_core_id(); - int num_cores = vx_num_cores(); - int num_words_per_core = num_words / num_cores; - - int offset = core_id * num_words_per_core; + struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + uint32_t count = arg->count; + int32_t* src_ptr = (int32_t*)arg->src_ptr; + int32_t* dst_ptr = (int32_t*)arg->dst_ptr; - for (int i = 0; i < num_words_per_core; ++i) { - y[offset + i] = x[offset + i]; + uint32_t offset = vx_core_id() * count; + + for (uint32_t i = 0; i < count; ++i) { + dst_ptr[offset + i] = src_ptr[offset + i]; } } \ No newline at end of file diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index e100c2a3..96ee2dee 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -41,16 +41,16 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ run-fpga: $(PROJECT) - LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 run-ase: $(PROJECT) - ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 run-rtlsim: $(PROJECT) - LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 run-simx: $(PROJECT) - LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16 .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/driver/tests/demo/common.h b/driver/tests/demo/common.h index 51969fed..e2cee391 100644 --- a/driver/tests/demo/common.h +++ b/driver/tests/demo/common.h @@ -4,7 +4,7 @@ #define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 struct kernel_arg_t { - uint32_t stride; + uint32_t count; uint32_t src0_ptr; uint32_t src1_ptr; uint32_t dst_ptr; diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 290d888e..234fbd5a 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -14,23 +14,26 @@ exit(-1); \ } while (false) -const char* program_file = "kernel.bin"; -uint32_t data_stride = 0; +const char* kernel_file = "kernel.bin"; +uint32_t count = 0; + +vx_device_h device = nullptr; +vx_buffer_h buffer = nullptr; static void show_usage() { std::cout << "Vortex Driver Test." << std::endl; - std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; } static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:f:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { switch (c) { case 'n': - data_stride = atoi(optarg); + count = atoi(optarg); break; - case 'f': - program_file = optarg; + case 'k': + kernel_file = optarg; break; case 'h': case '?': { @@ -42,16 +45,8 @@ static void parse_args(int argc, char **argv) { exit(-1); } } - - if (nullptr == program_file) { - show_usage(); - exit(-1); - } } -vx_device_h device = nullptr; -vx_buffer_h buffer = nullptr; - void cleanup() { if (buffer) { vx_buf_release(buffer); @@ -61,9 +56,7 @@ void cleanup() { } } -int run_test(vx_device_h device, - vx_buffer_h buffer, - const kernel_arg_t& kernel_arg, +int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t num_points) { // start device @@ -86,13 +79,13 @@ int run_test(vx_device_h device, std::cout << "verify result" << std::endl; { int errors = 0; - auto buf_ptr = (int*)vx_host_ptr(buffer); + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); for (uint32_t i = 0; i < num_points; ++i) { int ref = i + i; int cur = buf_ptr[i]; if (cur != ref) { std::cout << "error at value " << i - << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; + << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; ++errors; } } @@ -113,21 +106,18 @@ int main(int argc, char *argv[]) { // parse command arguments parse_args(argc, argv); + if (count == 0) { + count = 1; + } + uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS); uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS); - if (data_stride == 0) { - data_stride = 1; - } + uint32_t num_points = count * max_cores * max_warps * max_threads; + uint32_t buf_size = num_points * sizeof(uint32_t); - kernel_arg.stride = data_stride; - - uint32_t num_points = max_cores * max_warps * max_threads; - uint32_t buf_size = num_points * data_stride * sizeof(uint32_t); - - std::cout << "number of workitems: " << num_points << std::endl; - std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl; + std::cout << "number of points: " << num_points << std::endl; std::cout << "buffer size: " << buf_size << " bytes" << std::endl; // open device connection @@ -136,55 +126,29 @@ int main(int argc, char *argv[]) { // upload program std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, program_file)); + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); // allocate device memory std::cout << "allocate device memory" << std::endl; RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); kernel_arg.dst_ptr = value; + kernel_arg.count = count; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; - + // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); - - // populate source buffer0 values - std::cout << "populate source buffer0 values" << std::endl; - { - auto buf_ptr = (int*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i-1; - } - } - - // upload source buffer0 - std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0)); - - // populate source buffer1 values - std::cout << "populate source buffer1 values" << std::endl; - { - auto buf_ptr = (int*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i+1; - } - } - - // upload source buffer1 - std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0)); - + // upload kernel argument std::cout << "upload kernel argument" << std::endl; { @@ -193,9 +157,41 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); } + // upload source buffer0 + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i-1; + } + } + std::cout << "upload source buffer0" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0)); + + // upload source buffer1 + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i+1; + } + } + std::cout << "upload source buffer1" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0)); + + // clear destination buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = 0; + } + } + std::cout << "clear destination buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + + + // run tests std::cout << "run tests" << std::endl; - RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points)); + RT_CHECK(run_test(kernel_arg, buf_size, num_points)); // cleanup std::cout << "cleanup" << std::endl; diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin old mode 100755 new mode 100644 index 9fd70b2f..9585f4da Binary files a/driver/tests/demo/kernel.bin and b/driver/tests/demo/kernel.bin differ diff --git a/driver/tests/demo/kernel.c b/driver/tests/demo/kernel.c index 50b7455e..cd8c245d 100644 --- a/driver/tests/demo/kernel.c +++ b/driver/tests/demo/kernel.c @@ -6,13 +6,14 @@ void kernel_body(void* arg) { struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); - int* src0_ptr = (int*)_arg->src0_ptr; - int* src1_ptr = (int*)_arg->src1_ptr; - int* dst_ptr = (int*)_arg->dst_ptr; + uint32_t count = _arg->count; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + + uint32_t offset = vx_thread_gid() * count; - unsigned offset = vx_thread_gid() * _arg->stride; - - for (unsigned i = 0; i < _arg->stride; ++i) { + for (uint32_t i = 0; i < count; ++i) { dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; } } diff --git a/hw/opae/README b/hw/opae/README index 1db15e33..6eb9f76c 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -68,6 +68,11 @@ vcd file vortex.vcd vcd add -r /*/Vortex/hw/rtl/* run -all +# compress FPGA output files +tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` +tar -zcvf output_files_1c_rel.tar.gz `find ./build_fpga_1c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` +tar -zcvf output_files_2c_rel.tar.gz `find ./build_fpga_2c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)` + # compress VCD trace tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 8af084f5..226f89f2 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -2,8 +2,10 @@ vortex_afu.json QI:vortex_afu.qsf -+define+NDEBUG -#+define+SCOPE ++define+SCOPE + ++define+NUM_WARPS=2 ++define+NUM_THREADS=2 #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE @@ -77,6 +79,7 @@ QI:vortex_afu.qsf ../rtl/Vortex_Socket.v ../rtl/Vortex_Cluster.v ../rtl/Vortex.v +../rtl/VX_mem_unit.v ../rtl/VX_pipeline.v ../rtl/VX_front_end.v ../rtl/VX_back_end.v @@ -94,12 +97,11 @@ QI:vortex_afu.qsf ../rtl/VX_gpr.v ../rtl/VX_gpr_ram.v ../rtl/VX_gpr_stage.v -../rtl/VX_mem_ctrl.v ../rtl/VX_alu_unit.v ../rtl/VX_lsu_unit.v +../rtl/VX_lsu_addr_gen.v ../rtl/VX_decode.v ../rtl/VX_inst_multiplex.v -../rtl/VX_lsu_addr_gen.v ../rtl/VX_dcache_arb.v ../rtl/VX_mem_arb.v diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index cf05f28c..75c1bda1 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -1,4 +1,7 @@ # Analysis & Synthesis Assignments set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON \ No newline at end of file +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON +set_global_assignment -name VERILOG_MACRO QUARTUS +set_global_assignment -name VERILOG_MACRO SYNTHESIS +set_global_assignment -name VERILOG_MACRO NDEBUG \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index c9e68b05..7b43a733 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -13,8 +13,6 @@ import local_mem_cfg_pkg::*; `include "VX_define.vh" -`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)] - module vortex_afu #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( @@ -139,10 +137,12 @@ t_ccip_clAddr csr_io_addr; logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr; logic[DRAM_ADDR_WIDTH-1:0] csr_data_size; +`ifdef SCOPE logic [63:0] csr_scope_cmd; logic [63:0] csr_scope_data; logic csr_scope_read; logic csr_scope_write; +`endif // MMIO controller //////////////////////////////////////////////////////////// @@ -154,9 +154,11 @@ assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); t_if_ccip_c2_Tx mmio_tx; assign af2cp_sTxPort.c2 = mmio_tx; +`ifdef SCOPE assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data); assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address); assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address); +`endif always_ff @(posedge clk) begin @@ -202,11 +204,13 @@ begin $display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data)); `endif end + `ifdef SCOPE MMIO_CSR_SCOPE_CMD: begin `ifdef DBG_PRINT_OPAE $display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data)); `endif end + `endif default: begin // user-defined CSRs //if (mmio_hdr.addres >= MMIO_CSR_USER) begin @@ -237,18 +241,20 @@ begin 16'h0008: mmio_tx.data <= 64'h0; // reserved MMIO_CSR_STATUS: begin `ifdef DBG_PRINT_OPAE - if (state != mmio_tx.data) begin + if (state != state_t'(mmio_tx.data)) begin $display("%t: STATUS: state=%0d", $time, state); end `endif mmio_tx.data <= 64'(state); end + `ifdef SCOPE MMIO_CSR_SCOPE_DATA: begin mmio_tx.data <= csr_scope_data; `ifdef DBG_PRINT_OPAE $display("%t: SCOPE: data=%0h", $time, csr_scope_data); `endif end + `endif default: mmio_tx.data <= 64'h0; endcase mmio_tx.mmioRdValid <= 1; // post response @@ -406,7 +412,7 @@ begin case (state) CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr; CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); - default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr); + default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; endcase case (state) @@ -821,7 +827,7 @@ end `SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 641, "oops!") +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 626, "oops!") wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) @@ -855,15 +861,17 @@ VX_scope #( `endif -// Vortex binding ///////////////////////////////////////////////////////////// +// Vortex ///////////////////////////////////////////////////////////////////// assign cmd_run_done = !vx_busy; Vortex_Socket #() vx_socket ( - `SCOPE_SIGNALS_ICACHE_ATTACH - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_CORE_BIND + `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_PIPELINE_BIND + `SCOPE_SIGNALS_BE_BIND .clk (clk), .reset (vx_reset), diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 240112e2..b188e3a8 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -3,7 +3,7 @@ module VX_back_end #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_DCACHE_IO + `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_BE_IO input wire clk, @@ -71,7 +71,7 @@ module VX_back_end #( VX_lsu_unit #( .CORE_ID(CORE_ID) ) lsu_unit ( - `SCOPE_SIGNALS_DCACHE_ATTACH + `SCOPE_SIGNALS_LSU_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 7e34d146..0985840d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -2,6 +2,7 @@ `define VX_DEFINE `include "VX_config.vh" +`include "VX_scope.vh" // `define QUEUE_FORCE_MLAB 1 // `define SYN 1 @@ -139,7 +140,7 @@ /////////////////////////////////////////////////////////////////////////////// -`ifndef NDEBUG // pc, wb, rd, warp_num +`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num `define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS) `else `define DEBUG_CORE_REQ_MDATA_WIDTH 0 @@ -286,316 +287,5 @@ `define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)} -/////////////////////////////////////////////////////////////////////////////// - -`ifdef SCOPE - `define SCOPE_SIGNALS_DATA_LIST \ - scope_icache_req_warp_num, \ - scope_icache_req_addr, \ - scope_icache_req_tag, \ - scope_icache_rsp_data, \ - scope_icache_rsp_tag, \ - scope_dcache_req_warp_num, \ - scope_dcache_req_curr_PC, \ - scope_dcache_req_addr, \ - scope_dcache_req_rw, \ - scope_dcache_req_byteen, \ - scope_dcache_req_data, \ - scope_dcache_req_tag, \ - scope_dcache_rsp_data, \ - scope_dcache_rsp_tag, \ - scope_dram_req_addr, \ - scope_dram_req_rw, \ - scope_dram_req_byteen, \ - scope_dram_req_data, \ - scope_dram_req_tag, \ - scope_dram_rsp_data, \ - scope_dram_rsp_tag, \ - scope_snp_req_addr, \ - scope_snp_req_invalidate, \ - scope_snp_req_tag, \ - scope_snp_rsp_tag, \ - scope_decode_warp_num, \ - scope_decode_curr_PC, \ - scope_decode_is_jal, \ - scope_decode_rs1, \ - scope_decode_rs2, \ - scope_execute_warp_num, \ - scope_execute_rd, \ - scope_execute_a, \ - scope_execute_b, \ - scope_writeback_warp_num, \ - scope_writeback_wb, \ - scope_writeback_rd, \ - scope_writeback_data, - - - `define SCOPE_SIGNALS_UPD_LIST \ - scope_icache_req_valid, \ - scope_icache_req_ready, \ - scope_icache_rsp_valid, \ - scope_icache_rsp_ready, \ - scope_dcache_req_valid, \ - scope_dcache_req_ready, \ - scope_dcache_rsp_valid, \ - scope_dcache_rsp_ready, \ - scope_dram_req_valid, \ - scope_dram_req_ready, \ - scope_dram_rsp_valid, \ - scope_dram_rsp_ready, \ - scope_snp_req_valid, \ - scope_snp_req_ready, \ - scope_snp_rsp_valid, \ - scope_snp_rsp_ready, \ - scope_decode_valid, \ - scope_execute_valid, \ - scope_writeback_valid, \ - scope_schedule_delay, \ - scope_memory_delay, \ - scope_exec_delay, \ - scope_gpr_stage_delay, \ - scope_busy - - `define SCOPE_SIGNALS_DECL \ - wire scope_icache_req_valid; \ - wire [1:0] scope_icache_req_warp_num; \ - wire [31:0] scope_icache_req_addr; \ - wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ - wire scope_icache_req_ready; \ - wire scope_icache_rsp_valid; \ - wire [31:0] scope_icache_rsp_data; \ - wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \ - wire scope_icache_rsp_ready; \ - wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \ - wire [1:0] scope_dcache_req_warp_num; \ - wire [31:0] scope_dcache_req_curr_PC; \ - wire [31:0] scope_dcache_req_addr; \ - wire scope_dcache_req_rw; \ - wire [3:0] scope_dcache_req_byteen; \ - wire [31:0] scope_dcache_req_data; \ - wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ - wire scope_dcache_req_ready; \ - wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \ - wire [31:0] scope_dcache_rsp_data; \ - wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ - wire scope_dcache_rsp_ready; \ - wire scope_dram_req_valid; \ - wire [31:0] scope_dram_req_addr; \ - wire scope_dram_req_rw; \ - wire [15:0] scope_dram_req_byteen; \ - wire [31:0] scope_dram_req_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ - wire scope_dram_req_ready; \ - wire scope_dram_rsp_valid; \ - wire [31:0] scope_dram_rsp_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ - wire scope_dram_rsp_ready; \ - wire scope_snp_req_valid; \ - wire [31:0] scope_snp_req_addr; \ - wire scope_snp_req_invalidate; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ - wire scope_snp_req_ready; \ - wire scope_snp_rsp_valid; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ - wire scope_busy; \ - wire scope_snp_rsp_ready; \ - wire scope_schedule_delay; \ - wire scope_memory_delay; \ - wire scope_exec_delay; \ - wire scope_gpr_stage_delay; \ - wire [3:0] scope_decode_valid; \ - wire [1:0] scope_decode_warp_num; \ - wire [31:0] scope_decode_curr_PC; \ - wire scope_decode_is_jal; \ - wire [4:0] scope_decode_rs1; \ - wire [4:0] scope_decode_rs2; \ - wire [3:0] scope_execute_valid; \ - wire [1:0] scope_execute_warp_num; \ - wire [4:0] scope_execute_rd; \ - wire [31:0] scope_execute_a; \ - wire [31:0] scope_execute_b; \ - wire [3:0] scope_writeback_valid; \ - wire [1:0] scope_writeback_warp_num; \ - wire [1:0] scope_writeback_wb; \ - wire [4:0] scope_writeback_rd; \ - wire [31:0] scope_writeback_data; - - `define SCOPE_SIGNALS_ICACHE_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire scope_icache_req_valid, \ - output wire [1:0] scope_icache_req_warp_num, \ - output wire [31:0] scope_icache_req_addr, \ - output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ - output wire scope_icache_req_ready, \ - output wire scope_icache_rsp_valid, \ - output wire [31:0] scope_icache_rsp_data, \ - output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \ - output wire scope_icache_rsp_ready, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_DCACHE_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \ - output wire [1:0] scope_dcache_req_warp_num, \ - output wire [31:0] scope_dcache_req_curr_PC, \ - output wire [31:0] scope_dcache_req_addr, \ - output wire scope_dcache_req_rw, \ - output wire [3:0] scope_dcache_req_byteen, \ - output wire [31:0] scope_dcache_req_data, \ - output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ - output wire scope_dcache_req_ready, \ - output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \ - output wire [31:0] scope_dcache_rsp_data, \ - output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \ - output wire scope_dcache_rsp_ready, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_DRAM_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire scope_dram_req_valid, \ - output wire [31:0] scope_dram_req_addr, \ - output wire scope_dram_req_rw, \ - output wire [15:0] scope_dram_req_byteen, \ - output wire [31:0] scope_dram_req_data, \ - output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \ - output wire scope_dram_req_ready, \ - output wire scope_dram_rsp_valid, \ - output wire [31:0] scope_dram_rsp_data, \ - output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \ - output wire scope_dram_rsp_ready, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_SNP_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire scope_snp_req_valid, \ - output wire [31:0] scope_snp_req_addr, \ - output wire scope_snp_req_invalidate, \ - output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag, \ - output wire scope_snp_req_ready, \ - output wire scope_snp_rsp_valid, \ - output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag, \ - output wire scope_snp_rsp_ready, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_CORE_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire scope_busy, \ - output wire scope_schedule_delay, \ - output wire scope_memory_delay, \ - output wire scope_exec_delay, \ - output wire scope_gpr_stage_delay, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_BE_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire [3:0] scope_decode_valid, \ - output wire [1:0] scope_decode_warp_num, \ - output wire [31:0] scope_decode_curr_PC, \ - output wire scope_decode_is_jal, \ - output wire [4:0] scope_decode_rs1, \ - output wire [4:0] scope_decode_rs2, \ - output wire [3:0] scope_execute_valid, \ - output wire [1:0] scope_execute_warp_num, \ - output wire [4:0] scope_execute_rd, \ - output wire [31:0] scope_execute_a, \ - output wire [31:0] scope_execute_b, \ - output wire [3:0] scope_writeback_valid, \ - output wire [1:0] scope_writeback_warp_num, \ - output wire [1:0] scope_writeback_wb, \ - output wire [4:0] scope_writeback_rd, \ - output wire [31:0] scope_writeback_data, - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_ICACHE_ATTACH \ - .scope_icache_req_valid (scope_icache_req_valid), \ - .scope_icache_req_warp_num (scope_icache_req_warp_num), \ - .scope_icache_req_addr (scope_icache_req_addr), \ - .scope_icache_req_tag (scope_icache_req_tag), \ - .scope_icache_req_ready (scope_icache_req_ready), \ - .scope_icache_rsp_valid (scope_icache_rsp_valid), \ - .scope_icache_rsp_data (scope_icache_rsp_data), \ - .scope_icache_rsp_tag (scope_icache_rsp_tag), \ - .scope_icache_rsp_ready (scope_icache_rsp_ready), - - `define SCOPE_SIGNALS_DCACHE_ATTACH \ - .scope_dcache_req_valid (scope_dcache_req_valid), \ - .scope_dcache_req_warp_num (scope_dcache_req_warp_num), \ - .scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \ - .scope_dcache_req_addr (scope_dcache_req_addr), \ - .scope_dcache_req_rw (scope_dcache_req_rw), \ - .scope_dcache_req_byteen(scope_dcache_req_byteen), \ - .scope_dcache_req_data (scope_dcache_req_data), \ - .scope_dcache_req_tag (scope_dcache_req_tag), \ - .scope_dcache_req_ready (scope_dcache_req_ready), \ - .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ - .scope_dcache_rsp_data (scope_dcache_rsp_data), \ - .scope_dcache_rsp_tag (scope_dcache_rsp_tag), \ - .scope_dcache_rsp_ready (scope_dcache_rsp_ready), - - `define SCOPE_SIGNALS_DRAM_ATTACH \ - .scope_dram_req_valid (scope_dram_req_valid), \ - .scope_dram_req_addr (scope_dram_req_addr), \ - .scope_dram_req_rw (scope_dram_req_rw), \ - .scope_dram_req_byteen (scope_dram_req_byteen), \ - .scope_dram_req_data (scope_dram_req_data), \ - .scope_dram_req_tag (scope_dram_req_tag), \ - .scope_dram_req_ready (scope_dram_req_ready), \ - .scope_dram_rsp_valid (scope_dram_rsp_valid), \ - .scope_dram_rsp_data (scope_dram_rsp_data), \ - .scope_dram_rsp_tag (scope_dram_rsp_tag), \ - .scope_dram_rsp_ready (scope_dram_rsp_ready), - - `define SCOPE_SIGNALS_SNP_ATTACH \ - .scope_snp_req_valid (scope_snp_req_valid), \ - .scope_snp_req_addr (scope_snp_req_addr), \ - .scope_snp_req_invalidate(scope_snp_req_invalidate), \ - .scope_snp_req_tag (scope_snp_req_tag), \ - .scope_snp_req_ready (scope_snp_req_ready), \ - .scope_snp_rsp_valid (scope_snp_rsp_valid), \ - .scope_snp_rsp_tag (scope_snp_rsp_tag), \ - .scope_snp_rsp_ready (scope_snp_rsp_ready), - - `define SCOPE_SIGNALS_CORE_ATTACH \ - .scope_busy (scope_busy), \ - .scope_schedule_delay (scope_schedule_delay), \ - .scope_memory_delay (scope_memory_delay), \ - .scope_exec_delay (scope_exec_delay), \ - .scope_gpr_stage_delay (scope_gpr_stage_delay), - - `define SCOPE_SIGNALS_BE_ATTACH \ - .scope_decode_valid (scope_decode_valid), \ - .scope_decode_warp_num (scope_decode_warp_num), \ - .scope_decode_curr_PC (scope_decode_curr_PC), \ - .scope_decode_is_jal (scope_decode_is_jal), \ - .scope_decode_rs1 (scope_decode_rs1), \ - .scope_decode_rs2 (scope_decode_rs2), \ - .scope_execute_valid (scope_execute_valid), \ - .scope_execute_warp_num (scope_execute_warp_num), \ - .scope_execute_rd (scope_execute_rd), \ - .scope_execute_a (scope_execute_a), \ - .scope_execute_b (scope_execute_b), \ - .scope_writeback_valid (scope_writeback_valid), \ - .scope_writeback_warp_num (scope_writeback_warp_num), \ - .scope_writeback_wb (scope_writeback_wb), \ - .scope_writeback_rd (scope_writeback_rd), \ - .scope_writeback_data (scope_writeback_data), - - `define SCOPE_ASSIGN(d,s) assign d = s -`else - `define SCOPE_SIGNALS_ICACHE_IO - `define SCOPE_SIGNALS_DCACHE_IO - `define SCOPE_SIGNALS_DRAM_IO - `define SCOPE_SIGNALS_CORE_IO - `define SCOPE_SIGNALS_BE_IO - - `define SCOPE_SIGNALS_ICACHE_ATTACH - `define SCOPE_SIGNALS_DCACHE_ATTACH - `define SCOPE_SIGNALS_DRAM_ATTACH - `define SCOPE_SIGNALS_CORE_ATTACH - `define SCOPE_SIGNALS_BE_ATTACH - - `define SCOPE_ASSIGN(d,s) -`endif - // VX_DEFINE `endif diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index a24f5e89..a6e7922b 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -3,7 +3,7 @@ module VX_front_end #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_ISTAGE_IO input wire clk, input wire reset, @@ -65,7 +65,7 @@ module VX_front_end #( VX_icache_stage #( .CORE_ID(CORE_ID) ) icache_stage ( - `SCOPE_SIGNALS_ICACHE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index a51bc206..1464dcab 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -3,7 +3,7 @@ module VX_icache_stage #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_ISTAGE_IO input wire clk, input wire reset, @@ -68,7 +68,7 @@ module VX_icache_stage #( // Can't accept new request assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready; -`ifndef NDEBUG +`ifdef DBG_CORE_REQ_INFO assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr}; `else assign icache_req_if.core_req_tag = mrq_write_addr; @@ -95,7 +95,7 @@ module VX_icache_stage #( `SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready); `ifdef DBG_PRINT_CORE_ICACHE - always_ff @(posedge clk) begin + always @(posedge clk) begin if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin $display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num); end diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index a0df418d..ed9359f2 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -3,7 +3,7 @@ module VX_lsu_unit #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_DCACHE_IO + `SCOPE_SIGNALS_LSU_IO input wire clk, input wire reset, @@ -130,10 +130,10 @@ module VX_lsu_unit #( assign dcache_req_if.core_req_addr = mem_req_addr; assign dcache_req_if.core_req_data = mem_req_data; -`ifndef NDEBUG - assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr}; +`ifdef DBG_CORE_REQ_INFO + assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr}; `else - assign dcache_req_if.core_req_tag = mrq_write_addr; + assign dcache_req_if.core_req_tag = mrq_write_addr; `endif // Can't accept new request @@ -179,7 +179,7 @@ module VX_lsu_unit #( `SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready); `ifdef DBG_PRINT_CORE_DCACHE - always_ff @(posedge clk) begin + always @(posedge clk) begin if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin $display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); diff --git a/hw/rtl/VX_mem_ctrl.v b/hw/rtl/VX_mem_unit.v similarity index 96% rename from hw/rtl/VX_mem_ctrl.v rename to hw/rtl/VX_mem_unit.v index c8e27436..c5eafbed 100644 --- a/hw/rtl/VX_mem_ctrl.v +++ b/hw/rtl/VX_mem_unit.v @@ -1,8 +1,10 @@ `include "VX_define.vh" -module VX_mem_ctrl # ( +module VX_mem_unit # ( parameter CORE_ID = 0 ) ( + `SCOPE_SIGNALS_ICACHE_IO + input wire clk, input wire reset, @@ -74,7 +76,7 @@ module VX_mem_ctrl # ( .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) - ) gpu_smem ( + ) smem ( .clk (clk), .reset (reset), @@ -157,7 +159,7 @@ module VX_mem_ctrl # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) - ) gpu_dcache ( + ) dcache ( .clk (clk), .reset (reset), @@ -239,7 +241,9 @@ module VX_mem_ctrl # ( .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) - ) gpu_icache ( + ) icache ( + `SCOPE_SIGNALS_ICACHE_BIND + .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index f5c937eb..d3712ae0 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -3,9 +3,9 @@ module VX_pipeline #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO - `SCOPE_SIGNALS_DCACHE_IO - `SCOPE_SIGNALS_CORE_IO + `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_SIGNALS_LSU_IO + `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -100,7 +100,7 @@ module VX_pipeline #( VX_front_end #( .CORE_ID(CORE_ID) ) front_end ( - `SCOPE_SIGNALS_ICACHE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND .clk (clk), .reset (reset), @@ -129,8 +129,8 @@ module VX_pipeline #( VX_back_end #( .CORE_ID(CORE_ID) ) back_end ( - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_BE_BIND .clk (clk), .reset (reset), @@ -181,7 +181,7 @@ module VX_pipeline #( `SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay); `ifdef DBG_PRINT_WB - always_ff @(posedge clk) begin + always @(posedge clk) begin if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin $display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data); end diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index e54a527a..9744533e 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -13,11 +13,10 @@ module VX_scheduler ( output wire schedule_delay, output wire is_empty ); - reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - reg [31:0] count_valid; + localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1); - wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); - wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); + reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; + reg [CTVW-1:0] count_valid; wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); @@ -51,7 +50,14 @@ module VX_scheduler ( integer i, w; + wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay; + wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); + wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; + + reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : + (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : + count_valid; always @(posedge clk) begin if (reset) begin @@ -62,19 +68,14 @@ module VX_scheduler ( end count_valid <= 0; end else begin - if (acquire_rd && !schedule_delay) begin + if (acquire_rd) begin rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; - count_valid <= count_valid + 1; end if (release_rd) begin assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); - rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; - if (0 == valid_wb_new_mask) begin - assert(count_valid != 0); - count_valid <= count_valid - 1; - end - end - + rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; + end + count_valid <= count_valid_next; end end diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh new file mode 100644 index 00000000..b2bcf632 --- /dev/null +++ b/hw/rtl/VX_scope.vh @@ -0,0 +1,283 @@ +`ifndef VX_SCOPE +`define VX_SCOPE + +`ifdef SCOPE + +`define SCOPE_SIGNALS_DATA_LIST \ + scope_icache_req_warp_num, \ + scope_icache_req_addr, \ + scope_icache_req_tag, \ + scope_icache_rsp_data, \ + scope_icache_rsp_tag, \ + scope_dcache_req_warp_num, \ + scope_dcache_req_curr_PC, \ + scope_dcache_req_addr, \ + scope_dcache_req_rw, \ + scope_dcache_req_byteen, \ + scope_dcache_req_data, \ + scope_dcache_req_tag, \ + scope_dcache_rsp_data, \ + scope_dcache_rsp_tag, \ + scope_dram_req_addr, \ + scope_dram_req_rw, \ + scope_dram_req_byteen, \ + scope_dram_req_data, \ + scope_dram_req_tag, \ + scope_dram_rsp_data, \ + scope_dram_rsp_tag, \ + scope_snp_req_addr, \ + scope_snp_req_invalidate, \ + scope_snp_req_tag, \ + scope_snp_rsp_tag, \ + scope_decode_warp_num, \ + scope_decode_curr_PC, \ + scope_decode_is_jal, \ + scope_decode_rs1, \ + scope_decode_rs2, \ + scope_execute_warp_num, \ + scope_execute_rd, \ + scope_execute_a, \ + scope_execute_b, \ + scope_writeback_warp_num, \ + scope_writeback_wb, \ + scope_writeback_rd, \ + scope_writeback_data, + + + `define SCOPE_SIGNALS_UPD_LIST \ + scope_icache_req_valid, \ + scope_icache_req_ready, \ + scope_icache_rsp_valid, \ + scope_icache_rsp_ready, \ + scope_dcache_req_valid, \ + scope_dcache_req_ready, \ + scope_dcache_rsp_valid, \ + scope_dcache_rsp_ready, \ + scope_dram_req_valid, \ + scope_dram_req_ready, \ + scope_dram_rsp_valid, \ + scope_dram_rsp_ready, \ + scope_snp_req_valid, \ + scope_snp_req_ready, \ + scope_snp_rsp_valid, \ + scope_snp_rsp_ready, \ + scope_decode_valid, \ + scope_execute_valid, \ + scope_writeback_valid, \ + scope_schedule_delay, \ + scope_memory_delay, \ + scope_exec_delay, \ + scope_gpr_stage_delay, \ + scope_busy, \ + scope_idram_req_valid, \ + scope_idram_req_ready, \ + scope_idram_rsp_valid, \ + scope_idram_rsp_ready + + `define SCOPE_SIGNALS_DECL \ + wire scope_icache_req_valid; \ + wire [`NW_BITS-1:0] scope_icache_req_warp_num; \ + wire [31:0] scope_icache_req_addr; \ + wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ + wire scope_icache_req_ready; \ + wire scope_icache_rsp_valid; \ + wire [31:0] scope_icache_rsp_data; \ + wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \ + wire scope_icache_rsp_ready; \ + wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \ + wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \ + wire [31:0] scope_dcache_req_curr_PC; \ + wire [31:0] scope_dcache_req_addr; \ + wire scope_dcache_req_rw; \ + wire [3:0] scope_dcache_req_byteen; \ + wire [31:0] scope_dcache_req_data; \ + wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ + wire scope_dcache_req_ready; \ + wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \ + wire [31:0] scope_dcache_rsp_data; \ + wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ + wire scope_dcache_rsp_ready; \ + wire scope_dram_req_valid; \ + wire [31:0] scope_dram_req_addr; \ + wire scope_dram_req_rw; \ + wire [15:0] scope_dram_req_byteen; \ + wire [31:0] scope_dram_req_data; \ + wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ + wire scope_dram_req_ready; \ + wire scope_dram_rsp_valid; \ + wire [31:0] scope_dram_rsp_data; \ + wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ + wire scope_dram_rsp_ready; \ + wire scope_snp_req_valid; \ + wire [31:0] scope_snp_req_addr; \ + wire scope_snp_req_invalidate; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ + wire scope_snp_req_ready; \ + wire scope_snp_rsp_valid; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ + wire scope_busy; \ + wire scope_snp_rsp_ready; \ + wire scope_schedule_delay; \ + wire scope_memory_delay; \ + wire scope_exec_delay; \ + wire scope_gpr_stage_delay; \ + wire [`NUM_THREADS-1:0] scope_decode_valid; \ + wire [`NW_BITS-1:0] scope_decode_warp_num; \ + wire [31:0] scope_decode_curr_PC; \ + wire scope_decode_is_jal; \ + wire [4:0] scope_decode_rs1; \ + wire [4:0] scope_decode_rs2; \ + wire [`NUM_THREADS-1:0] scope_execute_valid; \ + wire [`NW_BITS-1:0] scope_execute_warp_num; \ + wire [4:0] scope_execute_rd; \ + wire [31:0] scope_execute_a; \ + wire [31:0] scope_execute_b; \ + wire [`NUM_THREADS-1:0] scope_writeback_valid; \ + wire [`NW_BITS-1:0] scope_writeback_warp_num; \ + wire [1:0] scope_writeback_wb; \ + wire [4:0] scope_writeback_rd; \ + wire [31:0] scope_writeback_data; \ + wire scope_idram_req_valid; \ + wire scope_idram_req_ready; \ + wire scope_idram_rsp_valid; \ + wire scope_idram_rsp_ready; + + `define SCOPE_SIGNALS_ISTAGE_IO \ + output wire scope_icache_req_valid, \ + output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \ + output wire [31:0] scope_icache_req_addr, \ + output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ + output wire scope_icache_req_ready, \ + output wire scope_icache_rsp_valid, \ + output wire [31:0] scope_icache_rsp_data, \ + output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \ + output wire scope_icache_rsp_ready, + + `define SCOPE_SIGNALS_LSU_IO \ + output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \ + output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \ + output wire [31:0] scope_dcache_req_curr_PC, \ + output wire [31:0] scope_dcache_req_addr, \ + output wire scope_dcache_req_rw, \ + output wire [3:0] scope_dcache_req_byteen, \ + output wire [31:0] scope_dcache_req_data, \ + output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ + output wire scope_dcache_req_ready, \ + output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \ + output wire [31:0] scope_dcache_rsp_data, \ + output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \ + output wire scope_dcache_rsp_ready, + + `define SCOPE_SIGNALS_CORE_IO \ + + `define SCOPE_SIGNALS_ICACHE_IO \ + output wire scope_idram_req_valid, \ + output wire scope_idram_req_ready, \ + output wire scope_idram_rsp_valid, \ + output wire scope_idram_rsp_ready, + + `define SCOPE_SIGNALS_PIPELINE_IO \ + output wire scope_busy, \ + output wire scope_schedule_delay, \ + output wire scope_memory_delay, \ + output wire scope_exec_delay, \ + output wire scope_gpr_stage_delay, + + `define SCOPE_SIGNALS_BE_IO \ + output wire [`NUM_THREADS-1:0] scope_decode_valid, \ + output wire [`NW_BITS-1:0] scope_decode_warp_num, \ + output wire [31:0] scope_decode_curr_PC, \ + output wire scope_decode_is_jal, \ + output wire [4:0] scope_decode_rs1, \ + output wire [4:0] scope_decode_rs2, \ + output wire [`NUM_THREADS-1:0] scope_execute_valid, \ + output wire [`NW_BITS-1:0] scope_execute_warp_num, \ + output wire [4:0] scope_execute_rd, \ + output wire [31:0] scope_execute_a, \ + output wire [31:0] scope_execute_b, \ + output wire [`NUM_THREADS-1:0] scope_writeback_valid, \ + output wire [`NW_BITS-1:0] scope_writeback_warp_num, \ + output wire [1:0] scope_writeback_wb, \ + output wire [4:0] scope_writeback_rd, \ + output wire [31:0] scope_writeback_data, + + `define SCOPE_SIGNALS_ISTAGE_BIND \ + .scope_icache_req_valid (scope_icache_req_valid), \ + .scope_icache_req_warp_num (scope_icache_req_warp_num), \ + .scope_icache_req_addr (scope_icache_req_addr), \ + .scope_icache_req_tag (scope_icache_req_tag), \ + .scope_icache_req_ready (scope_icache_req_ready), \ + .scope_icache_rsp_valid (scope_icache_rsp_valid), \ + .scope_icache_rsp_data (scope_icache_rsp_data), \ + .scope_icache_rsp_tag (scope_icache_rsp_tag), \ + .scope_icache_rsp_ready (scope_icache_rsp_ready), + + `define SCOPE_SIGNALS_LSU_BIND \ + .scope_dcache_req_valid (scope_dcache_req_valid), \ + .scope_dcache_req_warp_num (scope_dcache_req_warp_num), \ + .scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \ + .scope_dcache_req_addr (scope_dcache_req_addr), \ + .scope_dcache_req_rw (scope_dcache_req_rw), \ + .scope_dcache_req_byteen(scope_dcache_req_byteen), \ + .scope_dcache_req_data (scope_dcache_req_data), \ + .scope_dcache_req_tag (scope_dcache_req_tag), \ + .scope_dcache_req_ready (scope_dcache_req_ready), \ + .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ + .scope_dcache_rsp_data (scope_dcache_rsp_data), \ + .scope_dcache_rsp_tag (scope_dcache_rsp_tag), \ + .scope_dcache_rsp_ready (scope_dcache_rsp_ready), + + `define SCOPE_SIGNALS_CORE_BIND \ + + `define SCOPE_SIGNALS_ICACHE_BIND \ + .scope_idram_req_valid (scope_idram_req_valid), \ + .scope_idram_req_ready (scope_idram_req_ready), \ + .scope_idram_rsp_valid (scope_idram_rsp_valid), \ + .scope_idram_rsp_ready (scope_idram_rsp_ready), + + `define SCOPE_SIGNALS_PIPELINE_BIND \ + .scope_busy (scope_busy), \ + .scope_schedule_delay (scope_schedule_delay), \ + .scope_memory_delay (scope_memory_delay), \ + .scope_exec_delay (scope_exec_delay), \ + .scope_gpr_stage_delay (scope_gpr_stage_delay), + + `define SCOPE_SIGNALS_BE_BIND \ + .scope_decode_valid (scope_decode_valid), \ + .scope_decode_warp_num (scope_decode_warp_num), \ + .scope_decode_curr_PC (scope_decode_curr_PC), \ + .scope_decode_is_jal (scope_decode_is_jal), \ + .scope_decode_rs1 (scope_decode_rs1), \ + .scope_decode_rs2 (scope_decode_rs2), \ + .scope_execute_valid (scope_execute_valid), \ + .scope_execute_warp_num (scope_execute_warp_num), \ + .scope_execute_rd (scope_execute_rd), \ + .scope_execute_a (scope_execute_a), \ + .scope_execute_b (scope_execute_b), \ + .scope_writeback_valid (scope_writeback_valid), \ + .scope_writeback_warp_num (scope_writeback_warp_num), \ + .scope_writeback_wb (scope_writeback_wb), \ + .scope_writeback_rd (scope_writeback_rd), \ + .scope_writeback_data (scope_writeback_data), + + `define SCOPE_ASSIGN(d,s) assign d = s +`else + `define SCOPE_SIGNALS_ISTAGE_IO + `define SCOPE_SIGNALS_LSU_IO + `define SCOPE_SIGNALS_CORE_IO + `define SCOPE_SIGNALS_ICACHE_IO + `define SCOPE_SIGNALS_PIPELINE_IO + `define SCOPE_SIGNALS_BE_IO + + `define SCOPE_SIGNALS_ISTAGE_BIND + `define SCOPE_SIGNALS_LSU_BIND + `define SCOPE_SIGNALS_CORE_BIND + `define SCOPE_SIGNALS_ICACHE_BIND + `define SCOPE_SIGNALS_PIPELINE_BIND + `define SCOPE_SIGNALS_BE_BIND + + `define SCOPE_ASSIGN(d,s) +`endif + +// VX_SCOPE +`endif \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index a1864c7e..0f8ac02e 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -3,9 +3,11 @@ module Vortex #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO - `SCOPE_SIGNALS_DCACHE_IO + `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO + `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -169,10 +171,10 @@ module Vortex #( VX_pipeline #( .CORE_ID(CORE_ID) ) pipeline ( - `SCOPE_SIGNALS_ICACHE_ATTACH - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_PIPELINE_BIND + `SCOPE_SIGNALS_BE_BIND .clk(clk), .reset(reset), @@ -232,9 +234,11 @@ module Vortex #( assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag; assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready; - VX_mem_ctrl #( + VX_mem_unit #( .CORE_ID(CORE_ID) - ) mem_ctrl ( + ) mem_unit ( + `SCOPE_SIGNALS_ICACHE_BIND + .clk (clk), .reset (reset), @@ -269,7 +273,7 @@ module Vortex #( .core_io_rsp_if (arb_io_rsp_if), .core_rsp_if (core_dcache_rsp_if) ); - + endmodule // Vortex diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 97a2f8c7..f6e322c3 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -3,9 +3,11 @@ module Vortex_Cluster #( parameter CLUSTER_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO - `SCOPE_SIGNALS_DCACHE_IO + `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO + `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -115,10 +117,12 @@ module Vortex_Cluster #( Vortex #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) vortex_core ( - `SCOPE_SIGNALS_ICACHE_ATTACH - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_CORE_BIND + `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_PIPELINE_BIND + `SCOPE_SIGNALS_BE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 3729b1f5..01e4a96c 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -1,9 +1,11 @@ `include "VX_define.vh" module Vortex_Socket ( - `SCOPE_SIGNALS_ICACHE_IO - `SCOPE_SIGNALS_DCACHE_IO + `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO + `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -61,10 +63,12 @@ module Vortex_Socket ( Vortex_Cluster #( .CLUSTER_ID(`L3CACHE_ID) ) Vortex_Cluster ( - `SCOPE_SIGNALS_ICACHE_ATTACH - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_CORE_BIND + `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_PIPELINE_BIND + `SCOPE_SIGNALS_BE_BIND .clk (clk), .reset (reset), @@ -155,10 +159,12 @@ module Vortex_Socket ( Vortex_Cluster #( .CLUSTER_ID(i) ) Vortex_Cluster ( - `SCOPE_SIGNALS_ICACHE_ATTACH - `SCOPE_SIGNALS_DCACHE_ATTACH - `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_SIGNALS_LSU_BIND + `SCOPE_SIGNALS_CORE_BIND + `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_PIPELINE_BIND + `SCOPE_SIGNALS_BE_BIND .clk (clk), .reset (reset), @@ -387,7 +393,7 @@ module Vortex_Socket ( end `ifdef DBG_PRINT_DRAM - always_ff @(posedge clk) begin + always @(posedge clk) begin if (dram_req_valid && dram_req_ready) begin $display("%t: DRAM req: rw=%b addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, dram_req_rw, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data); end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 2c5c175e..fa8b1206 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -101,7 +101,7 @@ module VX_bank #( input wire snp_rsp_ready ); -`DEBUG_BLOCK( +`ifdef DBG_CORE_REQ_INFO wire[31:0] debug_use_pc_st0; wire[1:0] debug_wb_st0; wire[4:0] debug_rd_st0; @@ -128,7 +128,7 @@ module VX_bank #( wire[WORD_SIZE-1:0] debug_byteen_st2; wire[`REQS_BITS-1:0] debug_tid_st2; wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2; -) +`endif wire snrq_pop; wire snrq_empty; @@ -300,7 +300,6 @@ module VX_bank #( wire qual_is_snp_st0; wire qual_snp_invalidate_st0; - wire valid_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0]; @@ -313,17 +312,17 @@ module VX_bank #( assign qual_is_fill_st0 = dfpq_pop_unqual; - assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; + assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; - assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : - mrvq_pop_unqual ? mrvq_addr_st0 : - reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : - snrq_pop_unqual ? snrq_addr_st0 : - 0; + assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : + mrvq_pop_unqual ? mrvq_addr_st0 : + reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : + snrq_pop_unqual ? snrq_addr_st0 : + 0; if (`WORD_SELECT_WIDTH != 0) begin - assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : - mrvq_pop_unqual ? mrvq_wsel_st0 : - 0; + assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : + mrvq_pop_unqual ? mrvq_wsel_st0 : + 0; end else begin `UNUSED_VAR(mrvq_wsel_st0) assign qual_wsel_st0 = 0; @@ -355,11 +354,11 @@ module VX_bank #( assign qual_from_mrvq_st0 = mrvq_pop_unqual; -`DEBUG_BLOCK( +`ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0; end -) +`endif VX_generic_register #( .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) @@ -408,18 +407,23 @@ module VX_bank #( wire from_mrvq_st1e; wire mrvq_recover_ready_state_st1e; - assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; + assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1]; assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; - assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2); + assign st2_pending_hazard_st1e = (miss_add_because_miss) + && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2); - assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2); + assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) + || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2); - assign mrvq_recover_ready_state_st1e = valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2 && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]); + assign mrvq_recover_ready_state_st1e = valid_st1e + && from_mrvq_st1e + && recover_mrvq_state_st2 + && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]); VX_tag_data_access #( .CACHE_SIZE (CACHE_SIZE), @@ -466,11 +470,12 @@ module VX_bank #( .mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e) ); -`DEBUG_BLOCK( +`ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; end -) +`endif + wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; wire from_mrvq_st1e_st2 = from_mrvq_st1e; @@ -506,11 +511,11 @@ module VX_bank #( .out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) ); -`DEBUG_BLOCK( +`ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; end -) +`endif // Enqueue to miss reserv if it's a valid miss assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2; @@ -539,7 +544,9 @@ module VX_bank #( assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 ); assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]); - assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1; + assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 + || mrvq_init_ready_state_hazard_st0_st1 + || mrvq_init_ready_state_hazard_st1e_st1; VX_cache_miss_resrv #( .BANK_ID (BANK_ID), @@ -592,10 +599,8 @@ module VX_bank #( // Enqueue core response - wire cwbq_push; - wire cwbq_pop; - wire cwbq_empty; - wire cwbq_full; + wire cwbq_push, cwbq_pop; + wire cwbq_empty, cwbq_full; wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; @@ -634,42 +639,22 @@ module VX_bank #( // Enqueue DRAM fill request -// `IGNORE_WARNINGS_BEGIN -// wire invalidate_fill; -// `IGNORE_WARNINGS_END -// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2; -// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2; + wire dram_fill_req_unqual = miss_add_unqual + && (!mrvq_init_ready_state_st2 + || (from_mrvq_st2 && !mrvq_recover_ready_state_st2)); -// VX_fill_invalidator #( -// .BANK_LINE_SIZE (BANK_LINE_SIZE), -// .NUM_BANKS (NUM_BANKS), -// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE) -// ) fill_invalidator ( -// .clk (clk), -// .reset (reset), -// .possible_fill (possible_fill), -// .success_fill (is_fill_st2), -// .fill_addr (fill_invalidator_addr), -// .invalidate_fill (invalidate_fill) -// ); - - wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || (from_mrvq_st2 && !mrvq_recover_ready_state_st2)); - - assign dram_fill_req_valid = dram_fill_req_unqual - && dram_fill_req_ready - && !( dwbq_push_stall - || mrvq_push_stall - || cwbq_push_stall); + assign dram_fill_req_valid = dram_fill_req_unqual + && !(dwbq_push_stall + || mrvq_push_stall + || cwbq_push_stall); assign dram_fill_req_addr = addr_st2; assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready; // Enqueue DRAM writeback request - wire dwbq_push; - wire dwbq_pop; - wire dwbq_empty; - wire dwbq_full; + wire dwbq_push, dwbq_pop; + wire dwbq_empty, dwbq_full; wire dwbq_is_dwb_in, dwbq_is_snp_in; wire dwbq_is_dwb_out, dwbq_is_snp_out; @@ -724,9 +709,9 @@ module VX_bank #( assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0); assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1); - assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire) - || (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire) - || (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire); + assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire) + || (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire) + || (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire); // bank pipeline stall assign stall_bank_pipe = cwbq_push_stall @@ -735,53 +720,27 @@ module VX_bank #( || dram_fill_req_stall; `ifdef DBG_PRINT_CACHE_BANK - if (NUM_BANKS == 1) begin - always_ff @(posedge clk) begin - if (core_req_valid && core_req_ready) begin - $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag); - end - if (core_rsp_valid && core_rsp_ready) begin - $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); - end - if (dram_fill_req_valid && dram_fill_req_ready) begin - $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr)); - end - if (dram_wb_req_valid && dram_wb_req_ready) begin - $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data); - end - if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin - $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data); - end - if (snp_req_valid && snp_req_ready) begin - $display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag); - end - if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); - end + always @(posedge clk) begin + if ((|core_req_valid) && core_req_ready) begin + $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); end - end else begin - always_ff @(posedge clk) begin - if ((|core_req_valid) && core_req_ready) begin - $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); - end - if (core_rsp_valid && core_rsp_ready) begin - $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); - end - if (dram_fill_req_valid && dram_fill_req_ready) begin - $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); - end - if (dram_wb_req_valid && dram_wb_req_ready) begin - $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); - end - if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin - $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); - end - if (snp_req_valid && snp_req_ready) begin - $display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag); - end - if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); - end + if (core_rsp_valid && core_rsp_ready) begin + $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); + end + if (dram_fill_req_valid && dram_fill_req_ready) begin + $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); + end + if (dram_wb_req_firevalid && dram_wb_req_ready) begin + $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); + end + if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin + $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); + end + if (snp_req_valid && snp_req_ready) begin + $display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag); + end + if (snp_rsp_valid && snp_rsp_ready) begin + $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); end end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index c52bf86a..d8f4db65 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -65,6 +65,8 @@ module VX_cache #( // Snooping forward tag width parameter SNP_FWD_TAG_WIDTH = 1 ) ( + `SCOPE_SIGNALS_ICACHE_IO + input wire clk, input wire reset, @@ -125,7 +127,7 @@ module VX_cache #( output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready ); -`DEBUG_BLOCK( +`ifdef DBG_CORE_REQ_INFO wire[31:0] debug_core_req_use_pc; wire[1:0] debug_core_req_wb; wire[4:0] debug_core_req_rd; @@ -135,7 +137,8 @@ module VX_cache #( if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0]; end -) +`endif + wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; wire [NUM_BANKS-1:0] per_bank_core_req_ready; @@ -476,7 +479,13 @@ module VX_cache #( .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), .snp_rsp_valid (snp_rsp_valid), .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready) - ); + .snp_rsp_ready (snp_rsp_ready) + ); + + `SCOPE_ASSIGN(scope_idram_req_valid, per_bank_dram_fill_req_valid[0]); + `SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready); + `SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]); + `SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]); + endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index f4462fd8..799674de 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -72,8 +72,6 @@ `define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} -`define LINE_TO_BYTE_ADDR(x, i) {x, `BANK_SELECT_BITS'(i), `BASE_ADDR_BITS'(0)} - -`define LINE_TO_BYTE_ADDR0(x) {x, `BASE_ADDR_BITS'(0)} +`define LINE_TO_BYTE_ADDR(x, i) {x, (((`BANK_SELECT_BITS + `BASE_ADDR_BITS)'(i)) << `BASE_ADDR_BITS)} `endif diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 188747c3..050fbce0 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -151,38 +151,21 @@ module VX_cache_miss_resrv #( end end -`ifdef DBG_PRINT_CACHE_MSRQ +`ifdef DBG_PRINT_CACHE_MSRQ integer j; - if (NUM_BANKS == 1) begin - always_ff @(posedge clk) begin - if (mrvq_push || mrvq_pop || increment_head || recover_state) begin - $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); - for (j = 0; j < MRVQ_SIZE; j++) begin - if (valid_table[j]) begin - $write(" "); - if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); - if (~ready_table[j]) $write("!"); - $write("addr%0d=%0h", j, {addr_table[j], `BASE_ADDR_BITS'(0)}); - end - end - $write("\n"); - end - end - end else begin - always_ff @(posedge clk) begin - if (mrvq_push || mrvq_pop || increment_head || recover_state) begin - $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); - for (j = 0; j < MRVQ_SIZE; j++) begin - if (valid_table[j]) begin - $write(" "); - if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); - if (~ready_table[j]) $write("!"); - $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); - end - end - $write("\n"); - end - end + always @(posedge clk) begin + if (mrvq_push || mrvq_pop || increment_head || recover_state) begin + $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); + for (j = 0; j < MRVQ_SIZE; j++) begin + if (valid_table[j]) begin + $write(" "); + if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); + if (~ready_table[j]) $write("!"); + $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); + end + end + $write("\n"); + end end `endif diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index c1e559b8..12b54cab 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -116,7 +116,7 @@ module VX_snp_forwarder #( end `ifdef DBG_PRINT_CACHE_SNP - always_ff @(posedge clk) begin + always @(posedge clk) begin if (snp_req_valid && snp_req_ready) begin $display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag); end diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divide.v index 2ce1e4c4..948c8d80 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divide.v @@ -17,112 +17,105 @@ module VX_divide #( output reg [WIDTHD-1:0] remainder ); -// synthesis read_comments_as_HDL on -// localparam IMPL = "quartus"; -// synthesis read_comments_as_HDL off - -// altera translate_off - localparam IMPL="fallback"; -// altera translate_on - generate + if (NREP != DREP) begin different_nrep_drep_not_yet_supported non_existing_module(); end - if (IMPL == "quartus") begin + `ifdef QUARTUS - localparam lpm_speed=SPEED == "HIGHEST" ? 9:5; + localparam lpm_speed=SPEED == "HIGHEST" ? 9 : 5; - lpm_divide #( - .LPM_WIDTHN(WIDTHN), - .LPM_WIDTHD(WIDTHD), - .LPM_NREPRESENTATION(NREP), - .LPM_DREPRESENTATION(DREP), - .LPM_PIPELINE(PIPELINE), - .LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_divider ( - .clock(clock), - .aclr(aclr), - .clken(clken), - .numer(numer), - .denom(denom), - .quotient(quotient), - .remain(remainder) - ); - end - else begin + lpm_divide #( + .LPM_WIDTHN(WIDTHN), + .LPM_WIDTHD(WIDTHD), + .LPM_NREPRESENTATION(NREP), + .LPM_DREPRESENTATION(DREP), + .LPM_PIPELINE(PIPELINE), + .LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator + .MAXIMIZE_SPEED(lpm_speed) + ) quartus_divider ( + .clock(clock), + .aclr(aclr), + .clken(clken), + .numer(numer), + .denom(denom), + .quotient(quotient), + .remain(remainder) + ); - wire [WIDTHN-1:0] numer_pipe_end; - wire [WIDTHD-1:0] denom_pipe_end; + `else - if (PIPELINE == 0) begin - assign numer_pipe_end = numer; - assign denom_pipe_end = denom; - end else begin - reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1]; - reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1]; + wire [WIDTHN-1:0] numer_pipe_end; + wire [WIDTHD-1:0] denom_pipe_end; - genvar i; - for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages - always @(posedge clock or posedge aclr) begin - if (aclr) begin - numer_pipe[i+1] <= 0; - denom_pipe[i+1] <= 0; - end - else if (clken) begin - numer_pipe[i+1] <= numer_pipe[i]; - denom_pipe[i+1] <= denom_pipe[i]; - end - end - end + if (PIPELINE == 0) begin + assign numer_pipe_end = numer; + assign denom_pipe_end = denom; + end else begin + reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1]; + reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1]; + genvar i; + for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages always @(posedge clock or posedge aclr) begin if (aclr) begin - numer_pipe[0] <= 0; - denom_pipe[0] <= 0; + numer_pipe[i+1] <= 0; + denom_pipe[i+1] <= 0; end else if (clken) begin - numer_pipe[0] <= numer; - denom_pipe[0] <= denom; + numer_pipe[i+1] <= numer_pipe[i]; + denom_pipe[i+1] <= denom_pipe[i]; end end - - assign numer_pipe_end = numer_pipe[PIPELINE-1]; - assign denom_pipe_end = denom_pipe[PIPELINE-1]; end - /* * * * * * * * * * * * * * * * * * * * * * */ - /* Do the actual fallback computation here */ - /* * * * * * * * * * * * * * * * * * * * * * */ - - if (NREP == "SIGNED") begin - - always @(*) begin - if (denom_pipe_end == 0) begin - quotient = 32'hffffffff; - remainder = numer_pipe_end; - end - else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin - // this edge case kills verilator in some cases by causing a division - // overflow exception. INT_MIN / -1 (on x86) - quotient = 0; - remainder = 0; - end - else begin - quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end)); - remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end)); - end + always @(posedge clock or posedge aclr) begin + if (aclr) begin + numer_pipe[0] <= 0; + denom_pipe[0] <= 0; + end + else if (clken) begin + numer_pipe[0] <= numer; + denom_pipe[0] <= denom; end - - end - else begin - assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end; - assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end; end + assign numer_pipe_end = numer_pipe[PIPELINE-1]; + assign denom_pipe_end = denom_pipe[PIPELINE-1]; end + + /* * * * * * * * * * * * * * * * * * * * * * */ + /* Do the actual fallback computation here */ + /* * * * * * * * * * * * * * * * * * * * * * */ + + if (NREP == "SIGNED") begin + always @(*) begin + if (denom_pipe_end == 0) begin + quotient = 32'hffffffff; + remainder = numer_pipe_end; + end + else if (denom_pipe_end == 32'hffffffff + && numer_pipe_end == 32'h80000000) begin + // this edge case kills verilator in some cases by causing a division + // overflow exception. INT_MIN / -1 (on x86) + quotient = 0; + remainder = 0; + end + else begin + quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end)); + remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end)); + end + end + end + else begin + assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end; + assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end; + end + + `endif + endgenerate endmodule : VX_divide diff --git a/hw/rtl/libs/VX_generic_priority_encoder.v b/hw/rtl/libs/VX_generic_priority_encoder.v index 343e9802..3cb06373 100644 --- a/hw/rtl/libs/VX_generic_priority_encoder.v +++ b/hw/rtl/libs/VX_generic_priority_encoder.v @@ -2,21 +2,17 @@ module VX_generic_priority_encoder #( parameter N = 1 -) ( - input wire[N-1:0] valids, - //output reg[$clog2(N)-1:0] index, - output reg[(`LOG2UP(N))-1:0] index, - //output reg[`LOG2UP(N):0] index, // eh - output reg found - ); - +) ( + input wire[N-1:0] valids, + output reg[(`LOG2UP(N))-1:0] index, + output reg found +); integer i; always @(*) begin index = 0; found = 0; for (i = N-1; i >= 0; i = i - 1) begin if (valids[i]) begin - //index = i[$clog2(N)-1:0]; index = i[(`LOG2UP(N))-1:0]; found = 1; end diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index c92e7297..b354f85d 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -132,7 +132,7 @@ module VX_generic_queue #( rd_ptr_r <= rd_ptr_next_r; if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + 2; + rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2); end else begin // (SIZE == 2); rd_ptr_next_r <= ~rd_ptr_next_r; end diff --git a/hw/rtl/libs/VX_mult.v b/hw/rtl/libs/VX_mult.v index 088cad0d..3b7aaf3b 100644 --- a/hw/rtl/libs/VX_mult.v +++ b/hw/rtl/libs/VX_mult.v @@ -19,110 +19,102 @@ module VX_mult #( output reg [WIDTHP-1:0] result ); -// synthesis read_comments_as_HDL on -// localparam IMPL = "quartus"; -// synthesis read_comments_as_HDL off - -// altera translate_off - localparam IMPL="fallback"; -// altera translate_on - generate - if (IMPL == "quartus") begin + `ifdef QUARTUS - localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5; - - if (FORCE_LE == "YES") begin - lpm_mult #( - .LPM_WIDTHA(WIDTHA), - .LPM_WIDTHB(WIDTHB), - .LPM_WIDTHP(WIDTHP), - .LPM_REPRESENTATION(REP), - .LPM_PIPELINE(PIPELINE), - .DSP_BLOCK_BALANCING("LOGIC ELEMENTS"), - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_mult ( - .clock(clock), - .aclr(aclr), - .clken(clken), - .dataa(dataa), - .datab(datab), - .result(result) - ); - end - else begin - lpm_mult#( - .LPM_WIDTHA(WIDTHA), - .LPM_WIDTHB(WIDTHB), - .LPM_WIDTHP(WIDTHP), - .LPM_REPRESENTATION(REP), - .LPM_PIPELINE(PIPELINE), - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_mult( - .clock(clock), - .aclr(aclr), - .clken(clken), - .dataa(dataa), - .datab(datab), - .result(result) - ); - end + localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5; + if (FORCE_LE == "YES") begin + lpm_mult #( + .LPM_WIDTHA(WIDTHA), + .LPM_WIDTHB(WIDTHB), + .LPM_WIDTHP(WIDTHP), + .LPM_REPRESENTATION(REP), + .LPM_PIPELINE(PIPELINE), + .DSP_BLOCK_BALANCING("LOGIC ELEMENTS"), + .MAXIMIZE_SPEED(lpm_speed) + ) quartus_mult ( + .clock(clock), + .aclr(aclr), + .clken(clken), + .dataa(dataa), + .datab(datab), + .result(result) + ); end else begin + lpm_mult#( + .LPM_WIDTHA(WIDTHA), + .LPM_WIDTHB(WIDTHB), + .LPM_WIDTHP(WIDTHP), + .LPM_REPRESENTATION(REP), + .LPM_PIPELINE(PIPELINE), + .MAXIMIZE_SPEED(lpm_speed) + ) quartus_mult( + .clock(clock), + .aclr(aclr), + .clken(clken), + .dataa(dataa), + .datab(datab), + .result(result) + ); + end - wire [WIDTHA-1:0] dataa_pipe_end; - wire [WIDTHB-1:0] datab_pipe_end; + `else + + wire [WIDTHA-1:0] dataa_pipe_end; + wire [WIDTHB-1:0] datab_pipe_end; - if (PIPELINE == 0) begin - assign dataa_pipe_end = dataa; - assign datab_pipe_end = datab; - end else begin - reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; - reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; - - genvar i; - for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages - always @(posedge clock or posedge aclr) begin - if (aclr) begin - dataa_pipe[i+1] <= 0; - datab_pipe[i+1] <= 0; - end - else if (clken) begin - dataa_pipe[i+1] <= dataa_pipe[i]; - datab_pipe[i+1] <= datab_pipe[i]; - end - end - end + if (PIPELINE == 0) begin + assign dataa_pipe_end = dataa; + assign datab_pipe_end = datab; + end else begin + reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; + reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; + genvar i; + for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages always @(posedge clock or posedge aclr) begin if (aclr) begin - dataa_pipe[0] <= 0; - datab_pipe[0] <= 0; + dataa_pipe[i+1] <= 0; + datab_pipe[i+1] <= 0; end else if (clken) begin - dataa_pipe[0] <= dataa; - datab_pipe[0] <= datab; + dataa_pipe[i+1] <= dataa_pipe[i]; + datab_pipe[i+1] <= datab_pipe[i]; end end - - assign dataa_pipe_end = dataa_pipe[PIPELINE-1]; - assign datab_pipe_end = datab_pipe[PIPELINE-1]; end - /* * * * * * * * * * * * * * * * * * * * * * */ - /* Do the actual fallback computation here */ - /* * * * * * * * * * * * * * * * * * * * * * */ - - if (REP == "SIGNED") begin - assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end)); - end - else begin - assign result = dataa_pipe_end*datab_pipe_end; + always @(posedge clock or posedge aclr) begin + if (aclr) begin + dataa_pipe[0] <= 0; + datab_pipe[0] <= 0; + end + else if (clken) begin + dataa_pipe[0] <= dataa; + datab_pipe[0] <= datab; + end end + assign dataa_pipe_end = dataa_pipe[PIPELINE-1]; + assign datab_pipe_end = datab_pipe[PIPELINE-1]; end + + /* * * * * * * * * * * * * * * * * * * * * * */ + /* Do the actual fallback computation here */ + /* * * * * * * * * * * * * * * * * * * * * * */ + + if (REP == "SIGNED") begin + assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end)); + end + else begin + assign result = dataa_pipe_end * datab_pipe_end; + end + + `endif + endgenerate endmodule: VX_mult diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 2020ea87..a9bfbbcb 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -18,26 +18,22 @@ module VX_scope #( input wire bus_write, input wire bus_read ); - localparam DELTA_ENABLE = (UPDW != 0); - localparam MAX_DELTA = (2 ** DELTAW) - 1; + localparam DELTA_ENABLE = (UPDW != 0); + localparam MAX_DELTA = (2 ** DELTAW) - 1; - typedef enum logic[2:0] { - CMD_GET_VALID, - CMD_GET_DATA, - CMD_GET_WIDTH, - CMD_GET_COUNT, - CMD_SET_DELAY, - CMD_SET_STOP, - CMD_RESERVED1, - CMD_RESERVED2 - } cmd_t; + localparam CMD_GET_VALID = 3'd0; + localparam CMD_GET_DATA = 3'd1; + localparam CMD_GET_WIDTH = 3'd2; + localparam CMD_GET_COUNT = 3'd3; + localparam CMD_SET_DELAY = 3'd4; + localparam CMD_SET_STOP = 3'd5; + localparam CMD_RESERVED1 = 3'd6; + localparam CMD_RESERVED2 = 3'd7; - typedef enum logic[1:0] { - GET_VALID, - GET_DATA, - GET_WIDTH, - GET_COUNT - } cmd_get_t; + localparam GET_VALID = 2'd0; + localparam GET_DATA = 2'd1; + localparam GET_WIDTH = 2'd2; + localparam GET_COUNT = 2'd3; reg [DATAW-1:0] data_store [SIZE-1:0]; reg [DELTAW-1:0] delta_store [SIZE-1:0]; @@ -84,10 +80,10 @@ module VX_scope #( CMD_GET_VALID, CMD_GET_DATA, CMD_GET_WIDTH, - CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type); - CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); - CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data); - default:; + CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type); + CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); + CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data); + default:; endcase end @@ -183,7 +179,7 @@ module VX_scope #( end `ifdef DBG_PRINT_SCOPE - always_ff @(posedge clk) begin + always @(posedge clk) begin if (bus_read) begin $display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr); end diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index e312ee68..29e0b7a2 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -28,6 +28,7 @@ VF += --x-initial unique VF += -exe $(SRCS) $(INCLUDE) DBG += -DVCD_OUTPUT $(DBG_PRINT) +DBG += -DDBG_CORE_REQ_INFO THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 18fdb968..ecab551d 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -11,11 +11,12 @@ double sc_time_stamp() { Simulator::Simulator() { // force random values for unitialized signals - Verilated::randReset(1); + Verilated::randReset(2); ram_ = nullptr; vortex_ = new VVortex_Socket(); + dram_rsp_active_ = false; snp_req_active_ = false; #ifdef VCD_OUTPUT @@ -76,7 +77,7 @@ void Simulator::eval_dram_bus() { return; } - // handle DRAM response cycle + // schedule DRAM responses int dequeue_index = -1; for (int i = 0; i < dram_rsp_vec_.size(); i++) { if (dram_rsp_vec_[i].cycles_left > 0) { @@ -88,16 +89,23 @@ void Simulator::eval_dram_bus() { } } - // handle DRAM response message - if ((dequeue_index != -1) + // send DRAM response + if (dram_rsp_active_ + && vortex_->dram_rsp_valid && vortex_->dram_rsp_ready) { - vortex_->dram_rsp_valid = 1; - memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); - vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; - free(dram_rsp_vec_[dequeue_index].data); - dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); - } else { - vortex_->dram_rsp_valid = 0; + dram_rsp_active_ = false; + } + if (!dram_rsp_active_) { + if (dequeue_index != -1) { + vortex_->dram_rsp_valid = 1; + memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); + vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; + free(dram_rsp_vec_[dequeue_index].data); + dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); + dram_rsp_active_ = true; + } else { + vortex_->dram_rsp_valid = 0; + } } // handle DRAM stalls @@ -111,7 +119,7 @@ void Simulator::eval_dram_bus() { } #endif - // handle DRAM requests + // process DRAM requests if (!dram_stalled) { if (vortex_->dram_req_valid) { if (vortex_->dram_req_rw) { diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index c3467494..1a5a9b6c 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -55,8 +55,9 @@ private: void eval_snp_bus(); std::vector dram_rsp_vec_; + int dram_rsp_active_; - uint32_t snp_req_active_; + bool snp_req_active_; uint32_t snp_req_size_; uint32_t pending_snp_reqs_; diff --git a/hw/syn/quartus/cache/Makefile b/hw/syn/quartus/cache/Makefile index a4c68207..f1f1f511 100755 --- a/hw/syn/quartus/cache/Makefile +++ b/hw/syn/quartus/cache/Makefile @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache" + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/cache" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/cache/project.sdc b/hw/syn/quartus/cache/project.sdc deleted file mode 100755 index 16582e56..00000000 --- a/hw/syn/quartus/cache/project.sdc +++ /dev/null @@ -1 +0,0 @@ -create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] diff --git a/hw/syn/quartus/cache/project.tcl b/hw/syn/quartus/cache/project.tcl deleted file mode 100644 index 0b591385..00000000 --- a/hw/syn/quartus/cache/project.tcl +++ /dev/null @@ -1,67 +0,0 @@ -load_package flow -package require cmdline - -set options { - { "project.arg" "" "Project name" } - { "family.arg" "" "Device family name" } - { "device.arg" "" "Device name" } - { "top.arg" "" "Top level module" } - { "src.arg" "" "Verilog source file" } - { "inc.arg" "" "Include path (optional)" } - { "sdc.arg" "" "Timing Design Constraints file (optional)" } - { "set.arg" "" "Macro value (optional)" } -} - -set q_args_orig $quartus(args) - -array set opts [::cmdline::getoptions quartus(args) $options] - -# Verify required parameters -set requiredParameters {project family device top src} -foreach p $requiredParameters { - if {$opts($p) == ""} { - puts stderr "Missing required parameter: -$p" - exit 1 - } -} - -project_new $opts(project) -overwrite - -set_global_assignment -name FAMILY $opts(family) -set_global_assignment -name DEVICE $opts(device) -set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin -set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL -set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 - -set idx 0 -foreach arg $q_args_orig { - incr idx - if [string match "-src" $arg] { - set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] - } - if [string match "-inc" $arg] { - set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] - } - if [string match "-sdc" $arg] { - set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] - } - if [string match "-set" $arg] { - set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] - } -} - -proc make_all_pins_virtual {} { - execute_module -tool map - set name_ids [get_names -filter * -node_type pin] - foreach_in_collection name_id $name_ids { - set pin_name [get_name_info -info full_path $name_id] - post_message "Making VIRTUAL_PIN assignment to $pin_name" - set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON - } - export_assignments -} - -make_all_pins_virtual - -project_close \ No newline at end of file diff --git a/hw/syn/quartus/pipeline/Makefile b/hw/syn/quartus/pipeline/Makefile index b1ceee0c..1a25f44c 100644 --- a/hw/syn/quartus/pipeline/Makefile +++ b/hw/syn/quartus/pipeline/Makefile @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/pipeline/project.tcl b/hw/syn/quartus/pipeline/project.tcl deleted file mode 100644 index 0b591385..00000000 --- a/hw/syn/quartus/pipeline/project.tcl +++ /dev/null @@ -1,67 +0,0 @@ -load_package flow -package require cmdline - -set options { - { "project.arg" "" "Project name" } - { "family.arg" "" "Device family name" } - { "device.arg" "" "Device name" } - { "top.arg" "" "Top level module" } - { "src.arg" "" "Verilog source file" } - { "inc.arg" "" "Include path (optional)" } - { "sdc.arg" "" "Timing Design Constraints file (optional)" } - { "set.arg" "" "Macro value (optional)" } -} - -set q_args_orig $quartus(args) - -array set opts [::cmdline::getoptions quartus(args) $options] - -# Verify required parameters -set requiredParameters {project family device top src} -foreach p $requiredParameters { - if {$opts($p) == ""} { - puts stderr "Missing required parameter: -$p" - exit 1 - } -} - -project_new $opts(project) -overwrite - -set_global_assignment -name FAMILY $opts(family) -set_global_assignment -name DEVICE $opts(device) -set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin -set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL -set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 - -set idx 0 -foreach arg $q_args_orig { - incr idx - if [string match "-src" $arg] { - set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] - } - if [string match "-inc" $arg] { - set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] - } - if [string match "-sdc" $arg] { - set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] - } - if [string match "-set" $arg] { - set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] - } -} - -proc make_all_pins_virtual {} { - execute_module -tool map - set name_ids [get_names -filter * -node_type pin] - foreach_in_collection name_id $name_ids { - set pin_name [get_name_info -info full_path $name_id] - post_message "Making VIRTUAL_PIN assignment to $pin_name" - set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON - } - export_assignments -} - -make_all_pins_virtual - -project_close \ No newline at end of file diff --git a/hw/syn/quartus/pipeline/project.sdc b/hw/syn/quartus/project.sdc similarity index 100% rename from hw/syn/quartus/pipeline/project.sdc rename to hw/syn/quartus/project.sdc diff --git a/hw/syn/quartus/top/project.tcl b/hw/syn/quartus/project.tcl similarity index 93% rename from hw/syn/quartus/top/project.tcl rename to hw/syn/quartus/project.tcl index 0b591385..e3d1f2cc 100644 --- a/hw/syn/quartus/top/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -33,6 +33,9 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +set_global_assignment -name VERILOG_MACRO QUARTUS +set_global_assignment -name VERILOG_MACRO SYNTHESIS +set_global_assignment -name VERILOG_MACRO NDEBUG set idx 0 foreach arg $q_args_orig { diff --git a/hw/syn/quartus/vortex/timing.tcl b/hw/syn/quartus/timing.tcl similarity index 100% rename from hw/syn/quartus/vortex/timing.tcl rename to hw/syn/quartus/timing.tcl diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 4e760887..c665c80d 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" -macro "NOPAE" + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -set "NOPAE" -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top/project.sdc b/hw/syn/quartus/top/project.sdc deleted file mode 100644 index 3c588f3b..00000000 --- a/hw/syn/quartus/top/project.sdc +++ /dev/null @@ -1,9 +0,0 @@ -set_time_format -unit ns -decimal_places 3 - -create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] - -derive_pll_clocks -create_base_clocks -derive_clock_uncertainty - - - diff --git a/hw/syn/quartus/vortex/Makefile b/hw/syn/quartus/vortex/Makefile index 0b591385..618e3a32 100644 --- a/hw/syn/quartus/vortex/Makefile +++ b/hw/syn/quartus/vortex/Makefile @@ -1,67 +1,70 @@ -load_package flow -package require cmdline +PROJECT = VX_vortex +TOP_LEVEL_ENTITY = VX_vortex +SRC_FILE = VX_vortex.v +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf -set options { - { "project.arg" "" "Project name" } - { "family.arg" "" "Device family name" } - { "device.arg" "" "Device name" } - { "top.arg" "" "Top level module" } - { "src.arg" "" "Verilog source file" } - { "inc.arg" "" "Include path (optional)" } - { "sdc.arg" "" "Timing Design Constraints file (optional)" } - { "set.arg" "" "Macro value (optional)" } -} +# Part, Family +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG -set q_args_orig $quartus(args) +# Executable Configuration +SYN_ARGS = --parallel --read_settings_files=on +FIT_ARGS = --part=$(DEVICE) --read_settings_files=on +ASM_ARGS = +STA_ARGS = --do_report_timing -array set opts [::cmdline::getoptions quartus(args) $options] +# Build targets +all: $(PROJECT).sta.rpt -# Verify required parameters -set requiredParameters {project family device top src} -foreach p $requiredParameters { - if {$opts($p) == ""} { - puts stderr "Missing required parameter: -$p" - exit 1 - } -} +syn: $(PROJECT).syn.rpt -project_new $opts(project) -overwrite +fit: $(PROJECT).fit.rpt -set_global_assignment -name FAMILY $opts(family) -set_global_assignment -name DEVICE $opts(device) -set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin -set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL -set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +asm: $(PROJECT).asm.rpt -set idx 0 -foreach arg $q_args_orig { - incr idx - if [string match "-src" $arg] { - set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] - } - if [string match "-inc" $arg] { - set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] - } - if [string match "-sdc" $arg] { - set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] - } - if [string match "-set" $arg] { - set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] - } -} +sta: $(PROJECT).sta.rpt -proc make_all_pins_virtual {} { - execute_module -tool map - set name_ids [get_names -filter * -node_type pin] - foreach_in_collection name_id $name_ids { - set pin_name [get_name_info -info full_path $name_id] - post_message "Making VIRTUAL_PIN assignment to $pin_name" - set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON - } - export_assignments -} +smart: smart.log -make_all_pins_virtual +# Target implementations +STAMP = echo done > -project_close \ No newline at end of file +$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) + quartus_syn $(PROJECT) $(SYN_ARGS) + $(STAMP) fit.chg + +$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt + quartus_fit $(PROJECT) $(FIT_ARGS) + $(STAMP) asm.chg + $(STAMP) sta.chg + +$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt + quartus_asm $(PROJECT) $(ASM_ARGS) + +$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt + quartus_sta $(PROJECT) $(STA_ARGS) + +smart.log: $(PROJECT_FILES) + quartus_sh --determine_smart_action $(PROJECT) > smart.log + +# Project initialization +$(PROJECT_FILES): + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" + +syn.chg: + $(STAMP) syn.chg + +fit.chg: + $(STAMP) fit.chg + +sta.chg: + $(STAMP) sta.chg + +asm.chg: + $(STAMP) asm.chg + +program: $(PROJECT).sof + quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" + +clean: + rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox diff --git a/hw/syn/quartus/vortex/project.sdc b/hw/syn/quartus/vortex/project.sdc deleted file mode 100644 index 3c588f3b..00000000 --- a/hw/syn/quartus/vortex/project.sdc +++ /dev/null @@ -1,9 +0,0 @@ -set_time_format -unit ns -decimal_places 3 - -create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] - -derive_pll_clocks -create_base_clocks -derive_clock_uncertainty - - - diff --git a/hw/syn/quartus/vortex/project.tcl b/hw/syn/quartus/vortex/project.tcl deleted file mode 100644 index afe69d48..00000000 --- a/hw/syn/quartus/vortex/project.tcl +++ /dev/null @@ -1,41 +0,0 @@ -load_package flow -package require cmdline - -set options { \ - { "project.arg" "" "Project name" } \ - { "family.arg" "" "Device family name" } \ - { "device.arg" "" "Device name" } \ - { "top.arg" "" "Top level module" } \ - { "sdc.arg" "" "Timing Design Constraints file" } \ - { "src.arg" "" "Verilog source file" } \ - { "inc.arg" "." "Include path" } \ -} - -array set opts [::cmdline::getoptions quartus(args) $options] - -project_new $opts(project) -overwrite - -set_global_assignment -name FAMILY $opts(family) -set_global_assignment -name DEVICE $opts(device) -set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name VERILOG_FILE $opts(src) -set_global_assignment -name SEARCH_PATH $opts(inc) -set_global_assignment -name SDC_FILE $opts(sdc) -set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin -set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL -set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 - -proc make_all_pins_virtual {} { - execute_module -tool map - set name_ids [get_names -filter * -node_type pin] - foreach_in_collection name_id $name_ids { - set pin_name [get_name_info -info full_path $name_id] - post_message "Making VIRTUAL_PIN assignment to $pin_name" - set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON - } - export_assignments -} - -make_all_pins_virtual - -project_close \ No newline at end of file