driver basic test and demo test refactoring

This commit is contained in:
Blaise Tine
2020-06-19 09:12:07 -07:00
parent e2e1b63e14
commit 68d9fc9a75
55 changed files with 1006 additions and 1205 deletions

View File

@@ -18,7 +18,10 @@ CXXFLAGS +=-fstack-protector
CXXFLAGS += -fPIC CXXFLAGS += -fPIC
# Enable scope analyzer # Enable scope analyzer
#CXXFLAGS += -DSCOPE CXXFLAGS += -DSCOPE
# config parameters
CXXFLAGS += -DNUM_WARPS=2 -DNUM_THREADS=2
LDFLAGS += -luuid LDFLAGS += -luuid

View File

@@ -25,31 +25,34 @@ struct scope_signal_t {
const char* name; const char* name;
}; };
constexpr int ilog2(int n) {
return (n > 1) ? 1 + ilog2(n >> 1) : 0;
}
static constexpr int NW_BITS = ilog2(NUM_WARPS);
static const scope_signal_t scope_signals[] = { static const scope_signal_t scope_signals[] = {
{ 2, "icache_req_warp_num" }, { NW_BITS, "icache_req_warp_num" },
{ 32, "icache_req_addr" }, { 32, "icache_req_addr" },
{ 2, "icache_req_tag" }, { NW_BITS, "icache_req_tag" },
{ 32, "icache_rsp_data" }, { 32, "icache_rsp_data" },
{ 2, "icache_rsp_tag" }, { NW_BITS, "icache_rsp_tag" },
{ 2, "dcache_req_warp_num" }, { NW_BITS, "dcache_req_warp_num" },
{ 32, "dcache_req_curr_PC" }, { 32, "dcache_req_curr_PC" },
{ 32, "dcache_req_addr" }, { 32, "dcache_req_addr" },
{ 1, "dcache_req_rw" }, { 1, "dcache_req_rw" },
{ 4, "dcache_req_byteen" }, { 4, "dcache_req_byteen" },
{ 32, "dcache_req_data" }, { 32, "dcache_req_data" },
{ 2, "dcache_req_tag" }, { NW_BITS, "dcache_req_tag" },
{ 32, "dcache_rsp_data" }, { 32, "dcache_rsp_data" },
{ 2 , "dcache_rsp_tag" }, { NW_BITS, "dcache_rsp_tag" },
{ 32, "dram_req_addr" }, { 32, "dram_req_addr" },
{ 1, "dram_req_rw" }, { 1, "dram_req_rw" },
{ 16, "dram_req_byteen" }, { 16, "dram_req_byteen" },
{ 32, "dram_req_data" }, { 32, "dram_req_data" },
{ 29, "dram_req_tag" }, { 29, "dram_req_tag" },
{ 32, "dram_rsp_data" }, { 32, "dram_rsp_data" },
{ 29, "dram_rsp_tag" }, { 29, "dram_rsp_tag" },
@@ -58,30 +61,32 @@ static const scope_signal_t scope_signals[] = {
{ 16, "snp_req_tag" }, { 16, "snp_req_tag" },
{ 16, "snp_rsp_tag" }, { 16, "snp_rsp_tag" },
{ 2, "decode_warp_num" }, { NW_BITS, "decode_warp_num" },
{ 32, "decode_curr_PC" }, { 32, "decode_curr_PC" },
{ 1, "decode_is_jal" }, { 1, "decode_is_jal" },
{ 5, "decode_rs1" }, { 5, "decode_rs1" },
{ 5, "decode_rs2" }, { 5, "decode_rs2" },
{ 2, "execute_warp_num" }, { NW_BITS, "execute_warp_num" },
{ 5, "execute_rd" }, { 5, "execute_rd" },
{ 32, "execute_a" }, { 32, "execute_a" },
{ 32, "execute_b" }, { 32, "execute_b" },
{ 2, "writeback_warp_num" }, { NW_BITS, "writeback_warp_num" },
{ 2, "writeback_wb" }, { 2, "writeback_wb" },
{ 5, "writeback_rd" }, { 5, "writeback_rd" },
{ 32, "writeback_data" }, { 32, "writeback_data" },
///////////////////////////////////////////////////////////////////////////
{ 1, "icache_req_valid" }, { 1, "icache_req_valid" },
{ 1, "icache_req_ready" }, { 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" }, { 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" }, { 1, "icache_rsp_ready" },
{ 4, "dcache_req_valid" }, { NUM_THREADS, "dcache_req_valid" },
{ 1, "dcache_req_ready" }, { 1, "dcache_req_ready" },
{ 4, "dcache_rsp_valid" }, { NUM_THREADS, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" }, { 1, "dcache_rsp_ready" },
{ 1, "dram_req_valid" }, { 1, "dram_req_valid" },
@@ -94,14 +99,19 @@ static const scope_signal_t scope_signals[] = {
{ 1, "snp_rsp_valid" }, { 1, "snp_rsp_valid" },
{ 1, "snp_rsp_ready" }, { 1, "snp_rsp_ready" },
{ 4, "decode_valid" }, { NUM_THREADS, "decode_valid" },
{ 4, "execute_valid" }, { NUM_THREADS, "execute_valid" },
{ 4, "writeback_valid" }, { NUM_THREADS, "writeback_valid" },
{ 1, "schedule_delay" }, { 1, "schedule_delay" },
{ 1, "memory_delay" }, { 1, "memory_delay" },
{ 1, "exec_delay" }, { 1, "exec_delay" },
{ 1, "gpr_stage_delay" }, { 1, "gpr_stage_delay" },
{ 1, "busy" }, { 1, "busy" },
{ 1, "idram_req_valid" },
{ 1, "idram_req_ready" },
{ 1, "idram_rsp_valid" },
{ 1, "idram_rsp_ready" },
}; };
static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
@@ -161,7 +171,10 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1)); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
assert(fwidth == (int)frame_width); if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
std::vector<char> signal_data(frame_width+1); std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0; uint64_t frame_offset = 0;

View File

@@ -13,18 +13,19 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_WB \ -DDBG_PRINT_WB \
-DDBG_PRINT_OPAE -DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS) DBG_PRINT=$(DBG_PRINT_FLAGS)
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2
CONFIGS += -DNUM_WARPS=2 -DNUM_THREADS=2
#DEBUG=1 DEBUG=1
#AFU=1 AFU=1
CFLAGS += -fPIC CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM $(MULTICORE) CFLAGS += -DUSE_RTLSIM $(CONFIGS)
LDFLAGS += -shared -pthread LDFLAGS += -shared -pthread
# LDFLAGS += -dynamiclib -pthread # LDFLAGS += -dynamiclib -pthread
@@ -35,7 +36,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE) VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += --x-initial unique VL_FLAGS += --x-initial unique
VL_FLAGS += --x-assign unique VL_FLAGS += --x-assign unique
@@ -47,9 +48,11 @@ VL_FLAGS += --x-assign unique
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG
VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT) VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT)
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT) CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
#VL_FLAGS += -DDBG_CORE_REQ_INFO
#CFLAGS += -DDBG_CORE_REQ_INFO
else else
CFLAGS += -DNDEBUG CFLAGS += -DNDEBUG
VL_FLAGS += -DNDEBUG VL_FLAGS += -DNDEBUG
endif endif

View File

@@ -44,16 +44,16 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT) run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 32
run-ase: $(PROJECT) run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-rtlsim: $(PROJECT) run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-simx: $(PROJECT) run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
.depend: $(SRCS) .depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend; $(CXX) $(CXXFLAGS) -MM $^ > .depend;

View File

@@ -1,10 +1,9 @@
#include <iostream> #include <iostream>
#include <unistd.h> #include <unistd.h>
#include <string.h>
#include <vortex.h> #include <vortex.h>
#include "common.h" #include "common.h"
int test = -1;
#define RT_CHECK(_expr) \ #define RT_CHECK(_expr) \
do { \ do { \
int _ret = _expr; \ int _ret = _expr; \
@@ -15,79 +14,84 @@ int test = -1;
exit(-1); \ exit(-1); \
} while (false) } while (false)
const char* kernel_file = "kernel.bin";
int test = -1;
uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) { static void parse_args(int argc, char **argv) {
int c; int c;
while ((c = getopt(argc, argv, "t:h?")) != -1) { while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) {
switch (c) { switch (c) {
case 't': { case 'n':
count = atoi(optarg);
break;
case 't':
test = atoi(optarg); test = atoi(optarg);
} break; break;
case 'k':
kernel_file = optarg;
break;
case 'h': case 'h':
case '?': { case '?': {
std::cout << "Test." << std::endl; show_usage();
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
exit(0); exit(0);
} break; } break;
default: default:
show_usage();
exit(-1); exit(-1);
} }
} }
} }
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
uint64_t shuffle(int i, uint64_t value) { uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));; return (value << i) | (value & ((1 << i)-1));;
} }
vx_device_h device = nullptr; int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
int total_blocks = NUM_BLOCKS;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf,
uint32_t address,
uint64_t value,
int num_blocks) {
int errors = 0; int errors = 0;
// write sbuf data // update source buffer
for (int i = 0; i < (64 * num_blocks) / 8; ++i) { for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value); ((uint64_t*)vx_host_ptr(buffer))[i] = shuffle(i, value);
}
// clear dbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
} }
// write buffer to local memory // write buffer to local memory
std::cout << "write buffer to local memory" << std::endl; std::cout << "write buffer to local memory" << std::endl;
RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0)); RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0));
// clear destination buffer
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(buffer))[i] = 0;
}
// read buffer from local memory // read buffer from local memory
std::cout << "read buffer from local memory" << std::endl; std::cout << "read buffer from local memory" << std::endl;
RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0)); RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0));
// verify result // verify result
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
for (int i = 0; i < (64 * num_blocks) / 8; ++i) { for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i]; auto curr = ((uint64_t*)vx_host_ptr(buffer))[i];
auto ref = shuffle(i, value); auto ref = shuffle(i, value);
if (curr != ref) { if (curr != ref) {
std::cout << "error at 0x" << std::hex << (address + 8 * i) std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i)
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl; << ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors; ++errors;
} }
@@ -102,35 +106,19 @@ int run_memcopy_test(vx_buffer_h sbuf,
return 0; return 0;
} }
int run_kernel_test(vx_device_h device, int run_kernel_test(const kernel_arg_t& kernel_arg,
vx_buffer_h sbuf, uint32_t buf_size,
vx_buffer_h dbuf, uint32_t num_points) {
const char* program,
int num_blocks) {
int errors = 0; int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff; // update source buffer
for (uint32_t i = 0; i < num_points; ++i) {
int src_dev_addr = DEV_MEM_SRC_ADDR; ((int32_t*)vx_host_ptr(buffer))[i] = i;
int dest_dev_addr = DEV_MEM_DST_ADDR;
// write sbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
}
// clear dbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
} }
// write buffer to local memory // write buffer to local memory
std::cout << "write buffer to local memory" << std::endl; std::cout << "write buffer to local memory" << std::endl;
RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0)); RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0));
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, program));
// start device // start device
std::cout << "start device" << std::endl; std::cout << "start device" << std::endl;
@@ -142,19 +130,24 @@ int run_kernel_test(vx_device_h device,
// flush the caches // flush the caches
std::cout << "flush the caches" << std::endl; std::cout << "flush the caches" << std::endl;
RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks)); RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
// clear destination buffer
for (uint32_t i = 0; i < num_points; ++i) {
((int32_t*)vx_host_ptr(buffer))[i] = 0;
}
// read buffer from local memory // read buffer from local memory
std::cout << "read buffer from local memory" << std::endl; std::cout << "read buffer from local memory" << std::endl;
RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0)); RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
// verify result // verify result
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
for (int i = 0; i < (64 * num_blocks) / 8; ++i) { for (uint32_t i = 0; i < num_points; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i]; int32_t curr = ((int32_t*)vx_host_ptr(buffer))[i];
auto ref = shuffle(i, seed); int32_t ref = i;
if (curr != ref) { if (curr != ref) {
std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i) std::cout << "error at value " << i
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl; << ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors; ++errors;
} }
@@ -170,33 +163,66 @@ int run_kernel_test(vx_device_h device,
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments // parse command arguments
parse_args(argc, argv); parse_args(argc, argv);
std::cout << "total blocks: " << total_blocks << std::endl; if (count == 0) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t num_points = max_cores * count;
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
uint32_t buf_size = num_blocks * 64;
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection // open device connection
std::cout << "open device connection" << std::endl; std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device)); RT_CHECK(vx_dev_open(&device));
// create source buffer // allocate device memory
std::cout << "create source buffer" << std::endl; RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &sbuf)); kernel_arg.src_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
// create destination buffer kernel_arg.count = count;
std::cout << "create destination buffer" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &dbuf)); std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// run tests // run tests
if (0 == test || -1 == test) { if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl; std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1)); RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, total_blocks)); RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
} }
if (1 == test || -1 == test) { if (1 == test || -1 == test) {
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (void*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
std::cout << "run kernel test" << std::endl; std::cout << "run kernel test" << std::endl;
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin", total_blocks)); RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points));
} }
// cleanup // cleanup

View File

@@ -1,8 +1,12 @@
#ifndef _COMMON_H_ #ifndef _COMMON_H_
#define _COMMON_H_ #define _COMMON_H_
#define DEV_MEM_SRC_ADDR 0x10000040 #define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
#define DEV_MEM_DST_ADDR 0x20000080
#define NUM_BLOCKS 16 struct kernel_arg_t {
uint32_t count;
uint32_t src_ptr;
uint32_t dst_ptr;
};
#endif #endif

Binary file not shown.

View File

@@ -4,17 +4,14 @@
#include "common.h" #include "common.h"
void main() { void main() {
int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR; struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
int64_t* y = (int64_t*)DEV_MEM_DST_ADDR; uint32_t count = arg->count;
int num_words = (NUM_BLOCKS * 64) / 8; int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int core_id = vx_core_id(); uint32_t offset = vx_core_id() * count;
int num_cores = vx_num_cores();
int num_words_per_core = num_words / num_cores;
int offset = core_id * num_words_per_core; for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset + i] = src_ptr[offset + i];
for (int i = 0; i < num_words_per_core; ++i) {
y[offset + i] = x[offset + i];
} }
} }

View File

@@ -41,16 +41,16 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT) run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-ase: $(PROJECT) run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-rtlsim: $(PROJECT) run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-simx: $(PROJECT) run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
.depend: $(SRCS) .depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend; $(CXX) $(CXXFLAGS) -MM $^ > .depend;

View File

@@ -4,7 +4,7 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 #define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t { struct kernel_arg_t {
uint32_t stride; uint32_t count;
uint32_t src0_ptr; uint32_t src0_ptr;
uint32_t src1_ptr; uint32_t src1_ptr;
uint32_t dst_ptr; uint32_t dst_ptr;

View File

@@ -14,23 +14,26 @@
exit(-1); \ exit(-1); \
} while (false) } while (false)
const char* program_file = "kernel.bin"; const char* kernel_file = "kernel.bin";
uint32_t data_stride = 0; uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
static void show_usage() { static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl; std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl; std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
} }
static void parse_args(int argc, char **argv) { static void parse_args(int argc, char **argv) {
int c; int c;
while ((c = getopt(argc, argv, "n:f:h?")) != -1) { while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) { switch (c) {
case 'n': case 'n':
data_stride = atoi(optarg); count = atoi(optarg);
break; break;
case 'f': case 'k':
program_file = optarg; kernel_file = optarg;
break; break;
case 'h': case 'h':
case '?': { case '?': {
@@ -42,16 +45,8 @@ static void parse_args(int argc, char **argv) {
exit(-1); exit(-1);
} }
} }
if (nullptr == program_file) {
show_usage();
exit(-1);
}
} }
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() { void cleanup() {
if (buffer) { if (buffer) {
vx_buf_release(buffer); vx_buf_release(buffer);
@@ -61,9 +56,7 @@ void cleanup() {
} }
} }
int run_test(vx_device_h device, int run_test(const kernel_arg_t& kernel_arg,
vx_buffer_h buffer,
const kernel_arg_t& kernel_arg,
uint32_t buf_size, uint32_t buf_size,
uint32_t num_points) { uint32_t num_points) {
// start device // start device
@@ -86,13 +79,13 @@ int run_test(vx_device_h device,
std::cout << "verify result" << std::endl; std::cout << "verify result" << std::endl;
{ {
int errors = 0; int errors = 0;
auto buf_ptr = (int*)vx_host_ptr(buffer); auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) { for (uint32_t i = 0; i < num_points; ++i) {
int ref = i + i; int ref = i + i;
int cur = buf_ptr[i]; int cur = buf_ptr[i];
if (cur != ref) { if (cur != ref) {
std::cout << "error at value " << i std::cout << "error at value " << i
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl; << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors; ++errors;
} }
} }
@@ -113,21 +106,18 @@ int main(int argc, char *argv[]) {
// parse command arguments // parse command arguments
parse_args(argc, argv); parse_args(argc, argv);
if (count == 0) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS); uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS); uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
if (data_stride == 0) { uint32_t num_points = count * max_cores * max_warps * max_threads;
data_stride = 1; uint32_t buf_size = num_points * sizeof(uint32_t);
}
kernel_arg.stride = data_stride; std::cout << "number of points: " << num_points << std::endl;
uint32_t num_points = max_cores * max_warps * max_threads;
uint32_t buf_size = num_points * data_stride * sizeof(uint32_t);
std::cout << "number of workitems: " << num_points << std::endl;
std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl; std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection // open device connection
@@ -136,20 +126,20 @@ int main(int argc, char *argv[]) {
// upload program // upload program
std::cout << "upload program" << std::endl; std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, program_file)); RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory // allocate device memory
std::cout << "allocate device memory" << std::endl; std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value; kernel_arg.src0_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value; kernel_arg.src1_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value; kernel_arg.dst_ptr = value;
kernel_arg.count = count;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
@@ -159,32 +149,6 @@ int main(int argc, char *argv[]) {
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t)); uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// populate source buffer0 values
std::cout << "populate source buffer0 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i-1;
}
}
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
// populate source buffer1 values
std::cout << "populate source buffer1 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
// upload source buffer1
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// upload kernel argument // upload kernel argument
std::cout << "upload kernel argument" << std::endl; std::cout << "upload kernel argument" << std::endl;
{ {
@@ -193,9 +157,41 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
} }
// upload source buffer0
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i-1;
}
}
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
// upload source buffer1
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
// run tests // run tests
std::cout << "run tests" << std::endl; std::cout << "run tests" << std::endl;
RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points)); RT_CHECK(run_test(kernel_arg, buf_size, num_points));
// cleanup // cleanup
std::cout << "cleanup" << std::endl; std::cout << "cleanup" << std::endl;

BIN
driver/tests/demo/kernel.bin Executable file → Normal file

Binary file not shown.

View File

@@ -6,13 +6,14 @@
void kernel_body(void* arg) { void kernel_body(void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
int* src0_ptr = (int*)_arg->src0_ptr; uint32_t count = _arg->count;
int* src1_ptr = (int*)_arg->src1_ptr; int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int* dst_ptr = (int*)_arg->dst_ptr; int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
unsigned offset = vx_thread_gid() * _arg->stride; uint32_t offset = vx_thread_gid() * count;
for (unsigned i = 0; i < _arg->stride; ++i) { for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
} }
} }

View File

@@ -68,6 +68,11 @@ vcd file vortex.vcd
vcd add -r /*/Vortex/hw/rtl/* vcd add -r /*/Vortex/hw/rtl/*
run -all run -all
# compress FPGA output files
tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
tar -zcvf output_files_1c_rel.tar.gz `find ./build_fpga_1c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
tar -zcvf output_files_2c_rel.tar.gz `find ./build_fpga_2c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
# compress VCD trace # compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd

View File

@@ -2,8 +2,10 @@ vortex_afu.json
QI:vortex_afu.qsf QI:vortex_afu.qsf
+define+NDEBUG +define+SCOPE
#+define+SCOPE
+define+NUM_WARPS=2
+define+NUM_THREADS=2
#+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE #+define+DBG_PRINT_CORE_DCACHE
@@ -77,6 +79,7 @@ QI:vortex_afu.qsf
../rtl/Vortex_Socket.v ../rtl/Vortex_Socket.v
../rtl/Vortex_Cluster.v ../rtl/Vortex_Cluster.v
../rtl/Vortex.v ../rtl/Vortex.v
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v ../rtl/VX_pipeline.v
../rtl/VX_front_end.v ../rtl/VX_front_end.v
../rtl/VX_back_end.v ../rtl/VX_back_end.v
@@ -94,12 +97,11 @@ QI:vortex_afu.qsf
../rtl/VX_gpr.v ../rtl/VX_gpr.v
../rtl/VX_gpr_ram.v ../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v ../rtl/VX_gpr_stage.v
../rtl/VX_mem_ctrl.v
../rtl/VX_alu_unit.v ../rtl/VX_alu_unit.v
../rtl/VX_lsu_unit.v ../rtl/VX_lsu_unit.v
../rtl/VX_lsu_addr_gen.v
../rtl/VX_decode.v ../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v ../rtl/VX_inst_multiplex.v
../rtl/VX_lsu_addr_gen.v
../rtl/VX_dcache_arb.v ../rtl/VX_dcache_arb.v
../rtl/VX_mem_arb.v ../rtl/VX_mem_arb.v

View File

@@ -2,3 +2,6 @@
# Analysis & Synthesis Assignments # Analysis & Synthesis Assignments
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG

View File

@@ -13,8 +13,6 @@ import local_mem_cfg_pkg::*;
`include "VX_define.vh" `include "VX_define.vh"
`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)]
module vortex_afu #( module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2 parameter NUM_LOCAL_MEM_BANKS = 2
) ( ) (
@@ -139,10 +137,12 @@ t_ccip_clAddr csr_io_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr; logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_data_size; logic[DRAM_ADDR_WIDTH-1:0] csr_data_size;
`ifdef SCOPE
logic [63:0] csr_scope_cmd; logic [63:0] csr_scope_cmd;
logic [63:0] csr_scope_data; logic [63:0] csr_scope_data;
logic csr_scope_read; logic csr_scope_read;
logic csr_scope_write; logic csr_scope_write;
`endif
// MMIO controller //////////////////////////////////////////////////////////// // MMIO controller ////////////////////////////////////////////////////////////
@@ -154,9 +154,11 @@ assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
t_if_ccip_c2_Tx mmio_tx; t_if_ccip_c2_Tx mmio_tx;
assign af2cp_sTxPort.c2 = mmio_tx; assign af2cp_sTxPort.c2 = mmio_tx;
`ifdef SCOPE
assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data); assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data);
assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address); assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address);
assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address); assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address);
`endif
always_ff @(posedge clk) always_ff @(posedge clk)
begin begin
@@ -202,11 +204,13 @@ begin
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data)); $display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
`endif `endif
end end
`ifdef SCOPE
MMIO_CSR_SCOPE_CMD: begin MMIO_CSR_SCOPE_CMD: begin
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data)); $display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data));
`endif `endif
end end
`endif
default: begin default: begin
// user-defined CSRs // user-defined CSRs
//if (mmio_hdr.addres >= MMIO_CSR_USER) begin //if (mmio_hdr.addres >= MMIO_CSR_USER) begin
@@ -237,18 +241,20 @@ begin
16'h0008: mmio_tx.data <= 64'h0; // reserved 16'h0008: mmio_tx.data <= 64'h0; // reserved
MMIO_CSR_STATUS: begin MMIO_CSR_STATUS: begin
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
if (state != mmio_tx.data) begin if (state != state_t'(mmio_tx.data)) begin
$display("%t: STATUS: state=%0d", $time, state); $display("%t: STATUS: state=%0d", $time, state);
end end
`endif `endif
mmio_tx.data <= 64'(state); mmio_tx.data <= 64'(state);
end end
`ifdef SCOPE
MMIO_CSR_SCOPE_DATA: begin MMIO_CSR_SCOPE_DATA: begin
mmio_tx.data <= csr_scope_data; mmio_tx.data <= csr_scope_data;
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: SCOPE: data=%0h", $time, csr_scope_data); $display("%t: SCOPE: data=%0h", $time, csr_scope_data);
`endif `endif
end end
`endif
default: mmio_tx.data <= 64'h0; default: mmio_tx.data <= 64'h0;
endcase endcase
mmio_tx.mmioRdValid <= 1; // post response mmio_tx.mmioRdValid <= 1; // post response
@@ -406,7 +412,7 @@ begin
case (state) case (state)
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr; CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr); default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
endcase endcase
case (state) case (state)
@@ -821,7 +827,7 @@ end
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); `SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 641, "oops!") `STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 626, "oops!")
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_icache_rsp_valid && scope_icache_rsp_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready)
@@ -855,15 +861,17 @@ VX_scope #(
`endif `endif
// Vortex binding ///////////////////////////////////////////////////////////// // Vortex /////////////////////////////////////////////////////////////////////
assign cmd_run_done = !vx_busy; assign cmd_run_done = !vx_busy;
Vortex_Socket #() vx_socket ( Vortex_Socket #() vx_socket (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_ATTACH `SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk), .clk (clk),
.reset (vx_reset), .reset (vx_reset),

View File

@@ -3,7 +3,7 @@
module VX_back_end #( module VX_back_end #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_BE_IO `SCOPE_SIGNALS_BE_IO
input wire clk, input wire clk,
@@ -71,7 +71,7 @@ module VX_back_end #(
VX_lsu_unit #( VX_lsu_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) lsu_unit ( ) lsu_unit (
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -2,6 +2,7 @@
`define VX_DEFINE `define VX_DEFINE
`include "VX_config.vh" `include "VX_config.vh"
`include "VX_scope.vh"
// `define QUEUE_FORCE_MLAB 1 // `define QUEUE_FORCE_MLAB 1
// `define SYN 1 // `define SYN 1
@@ -139,7 +140,7 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`ifndef NDEBUG // pc, wb, rd, warp_num `ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS) `define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS)
`else `else
`define DEBUG_CORE_REQ_MDATA_WIDTH 0 `define DEBUG_CORE_REQ_MDATA_WIDTH 0
@@ -286,316 +287,5 @@
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)} `define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_icache_req_warp_num, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_warp_num, \
scope_dcache_req_curr_PC, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_decode_warp_num, \
scope_decode_curr_PC, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_execute_warp_num, \
scope_execute_rd, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_warp_num, \
scope_writeback_wb, \
scope_writeback_rd, \
scope_writeback_data,
`define SCOPE_SIGNALS_UPD_LIST \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay, \
scope_busy
`define SCOPE_SIGNALS_DECL \
wire scope_icache_req_valid; \
wire [1:0] scope_icache_req_warp_num; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \
wire [1:0] scope_dcache_req_warp_num; \
wire [31:0] scope_dcache_req_curr_PC; \
wire [31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [3:0] scope_dcache_req_byteen; \
wire [31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \
wire [31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [31:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [31:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_busy; \
wire scope_snp_rsp_ready; \
wire scope_schedule_delay; \
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [3:0] scope_decode_valid; \
wire [1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [3:0] scope_execute_valid; \
wire [1:0] scope_execute_warp_num; \
wire [4:0] scope_execute_rd; \
wire [31:0] scope_execute_a; \
wire [31:0] scope_execute_b; \
wire [3:0] scope_writeback_valid; \
wire [1:0] scope_writeback_warp_num; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [31:0] scope_writeback_data;
`define SCOPE_SIGNALS_ICACHE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_icache_req_valid, \
output wire [1:0] scope_icache_req_warp_num, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_DCACHE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \
output wire [1:0] scope_dcache_req_warp_num, \
output wire [31:0] scope_dcache_req_curr_PC, \
output wire [31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [3:0] scope_dcache_req_byteen, \
output wire [31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \
output wire [31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_DRAM_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_dram_req_valid, \
output wire [31:0] scope_dram_req_addr, \
output wire scope_dram_req_rw, \
output wire [15:0] scope_dram_req_byteen, \
output wire [31:0] scope_dram_req_data, \
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \
output wire scope_dram_req_ready, \
output wire scope_dram_rsp_valid, \
output wire [31:0] scope_dram_rsp_data, \
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \
output wire scope_dram_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_SNP_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_snp_req_valid, \
output wire [31:0] scope_snp_req_addr, \
output wire scope_snp_req_invalidate, \
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag, \
output wire scope_snp_req_ready, \
output wire scope_snp_rsp_valid, \
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag, \
output wire scope_snp_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_CORE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_busy, \
output wire scope_schedule_delay, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_BE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [3:0] scope_decode_valid, \
output wire [1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [3:0] scope_execute_valid, \
output wire [1:0] scope_execute_warp_num, \
output wire [4:0] scope_execute_rd, \
output wire [31:0] scope_execute_a, \
output wire [31:0] scope_execute_b, \
output wire [3:0] scope_writeback_valid, \
output wire [1:0] scope_writeback_warp_num, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [31:0] scope_writeback_data,
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_ICACHE_ATTACH \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_DCACHE_ATTACH \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_DRAM_ATTACH \
.scope_dram_req_valid (scope_dram_req_valid), \
.scope_dram_req_addr (scope_dram_req_addr), \
.scope_dram_req_rw (scope_dram_req_rw), \
.scope_dram_req_byteen (scope_dram_req_byteen), \
.scope_dram_req_data (scope_dram_req_data), \
.scope_dram_req_tag (scope_dram_req_tag), \
.scope_dram_req_ready (scope_dram_req_ready), \
.scope_dram_rsp_valid (scope_dram_rsp_valid), \
.scope_dram_rsp_data (scope_dram_rsp_data), \
.scope_dram_rsp_tag (scope_dram_rsp_tag), \
.scope_dram_rsp_ready (scope_dram_rsp_ready),
`define SCOPE_SIGNALS_SNP_ATTACH \
.scope_snp_req_valid (scope_snp_req_valid), \
.scope_snp_req_addr (scope_snp_req_addr), \
.scope_snp_req_invalidate(scope_snp_req_invalidate), \
.scope_snp_req_tag (scope_snp_req_tag), \
.scope_snp_req_ready (scope_snp_req_ready), \
.scope_snp_rsp_valid (scope_snp_rsp_valid), \
.scope_snp_rsp_tag (scope_snp_rsp_tag), \
.scope_snp_rsp_ready (scope_snp_rsp_ready),
`define SCOPE_SIGNALS_CORE_ATTACH \
.scope_busy (scope_busy), \
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_ATTACH \
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ICACHE_IO
`define SCOPE_SIGNALS_DCACHE_IO
`define SCOPE_SIGNALS_DRAM_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ICACHE_ATTACH
`define SCOPE_SIGNALS_DCACHE_ATTACH
`define SCOPE_SIGNALS_DRAM_ATTACH
`define SCOPE_SIGNALS_CORE_ATTACH
`define SCOPE_SIGNALS_BE_ATTACH
`define SCOPE_ASSIGN(d,s)
`endif
// VX_DEFINE // VX_DEFINE
`endif `endif

View File

@@ -3,7 +3,7 @@
module VX_front_end #( module VX_front_end #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -65,7 +65,7 @@ module VX_front_end #(
VX_icache_stage #( VX_icache_stage #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) icache_stage ( ) icache_stage (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -3,7 +3,7 @@
module VX_icache_stage #( module VX_icache_stage #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -68,7 +68,7 @@ module VX_icache_stage #(
// Can't accept new request // Can't accept new request
assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready; assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready;
`ifndef NDEBUG `ifdef DBG_CORE_REQ_INFO
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr}; assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr};
`else `else
assign icache_req_if.core_req_tag = mrq_write_addr; assign icache_req_if.core_req_tag = mrq_write_addr;
@@ -95,7 +95,7 @@ module VX_icache_stage #(
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready); `SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_ICACHE `ifdef DBG_PRINT_CORE_ICACHE
always_ff @(posedge clk) begin always @(posedge clk) begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
$display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num); $display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
end end

View File

@@ -3,7 +3,7 @@
module VX_lsu_unit #( module VX_lsu_unit #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -130,10 +130,10 @@ module VX_lsu_unit #(
assign dcache_req_if.core_req_addr = mem_req_addr; assign dcache_req_if.core_req_addr = mem_req_addr;
assign dcache_req_if.core_req_data = mem_req_data; assign dcache_req_if.core_req_data = mem_req_data;
`ifndef NDEBUG `ifdef DBG_CORE_REQ_INFO
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr}; assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
`else `else
assign dcache_req_if.core_req_tag = mrq_write_addr; assign dcache_req_if.core_req_tag = mrq_write_addr;
`endif `endif
// Can't accept new request // Can't accept new request
@@ -179,7 +179,7 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready); `SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_DCACHE `ifdef DBG_PRINT_CORE_DCACHE
always_ff @(posedge clk) begin always @(posedge clk) begin
if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin
$display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", $display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data);

View File

@@ -1,8 +1,10 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_mem_ctrl # ( module VX_mem_unit # (
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -74,7 +76,7 @@ module VX_mem_ctrl # (
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) gpu_smem ( ) smem (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -157,7 +159,7 @@ module VX_mem_ctrl # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_dcache ( ) dcache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -239,7 +241,9 @@ module VX_mem_ctrl # (
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) gpu_icache ( ) icache (
`SCOPE_SIGNALS_ICACHE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -3,9 +3,9 @@
module VX_pipeline #( module VX_pipeline #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO `SCOPE_SIGNALS_BE_IO
// Clock // Clock
@@ -100,7 +100,7 @@ module VX_pipeline #(
VX_front_end #( VX_front_end #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) front_end ( ) front_end (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -129,8 +129,8 @@ module VX_pipeline #(
VX_back_end #( VX_back_end #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) back_end ( ) back_end (
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_BE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -181,7 +181,7 @@ module VX_pipeline #(
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay); `SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
`ifdef DBG_PRINT_WB `ifdef DBG_PRINT_WB
always_ff @(posedge clk) begin always @(posedge clk) begin
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
$display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data); $display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end end

View File

@@ -13,11 +13,10 @@ module VX_scheduler (
output wire schedule_delay, output wire schedule_delay,
output wire is_empty output wire is_empty
); );
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
reg [31:0] count_valid;
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); reg [CTVW-1:0] count_valid;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
@@ -51,8 +50,15 @@ module VX_scheduler (
integer i, w; integer i, w;
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay;
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (w = 0; w < `NUM_WARPS; w++) begin for (w = 0; w < `NUM_WARPS; w++) begin
@@ -62,19 +68,14 @@ module VX_scheduler (
end end
count_valid <= 0; count_valid <= 0;
end else begin end else begin
if (acquire_rd && !schedule_delay) begin if (acquire_rd) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
count_valid <= count_valid + 1;
end end
if (release_rd) begin if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
if (0 == valid_wb_new_mask) begin
assert(count_valid != 0);
count_valid <= count_valid - 1;
end
end end
count_valid <= count_valid_next;
end end
end end

283
hw/rtl/VX_scope.vh Normal file
View File

@@ -0,0 +1,283 @@
`ifndef VX_SCOPE
`define VX_SCOPE
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_icache_req_warp_num, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_warp_num, \
scope_dcache_req_curr_PC, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_decode_warp_num, \
scope_decode_curr_PC, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_execute_warp_num, \
scope_execute_rd, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_warp_num, \
scope_writeback_wb, \
scope_writeback_rd, \
scope_writeback_data,
`define SCOPE_SIGNALS_UPD_LIST \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay, \
scope_busy, \
scope_idram_req_valid, \
scope_idram_req_ready, \
scope_idram_rsp_valid, \
scope_idram_rsp_ready
`define SCOPE_SIGNALS_DECL \
wire scope_icache_req_valid; \
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
wire [31:0] scope_dcache_req_curr_PC; \
wire [31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [3:0] scope_dcache_req_byteen; \
wire [31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
wire [31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [31:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [31:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_busy; \
wire scope_snp_rsp_ready; \
wire scope_schedule_delay; \
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [`NUM_THREADS-1:0] scope_decode_valid; \
wire [`NW_BITS-1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [`NUM_THREADS-1:0] scope_execute_valid; \
wire [`NW_BITS-1:0] scope_execute_warp_num; \
wire [4:0] scope_execute_rd; \
wire [31:0] scope_execute_a; \
wire [31:0] scope_execute_b; \
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [31:0] scope_writeback_data; \
wire scope_idram_req_valid; \
wire scope_idram_req_ready; \
wire scope_idram_rsp_valid; \
wire scope_idram_rsp_ready;
`define SCOPE_SIGNALS_ISTAGE_IO \
output wire scope_icache_req_valid, \
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready,
`define SCOPE_SIGNALS_LSU_IO \
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
output wire [31:0] scope_dcache_req_curr_PC, \
output wire [31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [3:0] scope_dcache_req_byteen, \
output wire [31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
output wire [31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready,
`define SCOPE_SIGNALS_CORE_IO \
`define SCOPE_SIGNALS_ICACHE_IO \
output wire scope_idram_req_valid, \
output wire scope_idram_req_ready, \
output wire scope_idram_rsp_valid, \
output wire scope_idram_rsp_ready,
`define SCOPE_SIGNALS_PIPELINE_IO \
output wire scope_busy, \
output wire scope_schedule_delay, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay,
`define SCOPE_SIGNALS_BE_IO \
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
output wire [4:0] scope_execute_rd, \
output wire [31:0] scope_execute_a, \
output wire [31:0] scope_execute_b, \
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [31:0] scope_writeback_data,
`define SCOPE_SIGNALS_ISTAGE_BIND \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_LSU_BIND \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_CORE_BIND \
`define SCOPE_SIGNALS_ICACHE_BIND \
.scope_idram_req_valid (scope_idram_req_valid), \
.scope_idram_req_ready (scope_idram_req_ready), \
.scope_idram_rsp_valid (scope_idram_rsp_valid), \
.scope_idram_rsp_ready (scope_idram_rsp_ready),
`define SCOPE_SIGNALS_PIPELINE_BIND \
.scope_busy (scope_busy), \
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_BIND \
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_ICACHE_IO
`define SCOPE_SIGNALS_PIPELINE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_CORE_BIND
`define SCOPE_SIGNALS_ICACHE_BIND
`define SCOPE_SIGNALS_PIPELINE_BIND
`define SCOPE_SIGNALS_BE_BIND
`define SCOPE_ASSIGN(d,s)
`endif
// VX_SCOPE
`endif

View File

@@ -3,9 +3,11 @@
module Vortex #( module Vortex #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO `SCOPE_SIGNALS_BE_IO
// Clock // Clock
@@ -169,10 +171,10 @@ module Vortex #(
VX_pipeline #( VX_pipeline #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) pipeline ( ) pipeline (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_ATTACH `SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_BE_BIND
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
@@ -232,9 +234,11 @@ module Vortex #(
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag; assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready; assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
VX_mem_ctrl #( VX_mem_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) mem_ctrl ( ) mem_unit (
`SCOPE_SIGNALS_ICACHE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -3,9 +3,11 @@
module Vortex_Cluster #( module Vortex_Cluster #(
parameter CLUSTER_ID = 0 parameter CLUSTER_ID = 0
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO `SCOPE_SIGNALS_BE_IO
// Clock // Clock
@@ -115,10 +117,12 @@ module Vortex_Cluster #(
Vortex #( Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) .CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core ( ) vortex_core (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_ATTACH `SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -1,9 +1,11 @@
`include "VX_define.vh" `include "VX_define.vh"
module Vortex_Socket ( module Vortex_Socket (
`SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO `SCOPE_SIGNALS_BE_IO
// Clock // Clock
@@ -61,10 +63,12 @@ module Vortex_Socket (
Vortex_Cluster #( Vortex_Cluster #(
.CLUSTER_ID(`L3CACHE_ID) .CLUSTER_ID(`L3CACHE_ID)
) Vortex_Cluster ( ) Vortex_Cluster (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_ATTACH `SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -155,10 +159,12 @@ module Vortex_Socket (
Vortex_Cluster #( Vortex_Cluster #(
.CLUSTER_ID(i) .CLUSTER_ID(i)
) Vortex_Cluster ( ) Vortex_Cluster (
`SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_ATTACH `SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_BE_ATTACH `SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -387,7 +393,7 @@ module Vortex_Socket (
end end
`ifdef DBG_PRINT_DRAM `ifdef DBG_PRINT_DRAM
always_ff @(posedge clk) begin always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin if (dram_req_valid && dram_req_ready) begin
$display("%t: DRAM req: rw=%b addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, dram_req_rw, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data); $display("%t: DRAM req: rw=%b addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, dram_req_rw, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data);
end end

169
hw/rtl/cache/VX_bank.v vendored
View File

@@ -101,7 +101,7 @@ module VX_bank #(
input wire snp_rsp_ready input wire snp_rsp_ready
); );
`DEBUG_BLOCK( `ifdef DBG_CORE_REQ_INFO
wire[31:0] debug_use_pc_st0; wire[31:0] debug_use_pc_st0;
wire[1:0] debug_wb_st0; wire[1:0] debug_wb_st0;
wire[4:0] debug_rd_st0; wire[4:0] debug_rd_st0;
@@ -128,7 +128,7 @@ module VX_bank #(
wire[WORD_SIZE-1:0] debug_byteen_st2; wire[WORD_SIZE-1:0] debug_byteen_st2;
wire[`REQS_BITS-1:0] debug_tid_st2; wire[`REQS_BITS-1:0] debug_tid_st2;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2; wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2;
) `endif
wire snrq_pop; wire snrq_pop;
wire snrq_empty; wire snrq_empty;
@@ -300,7 +300,6 @@ module VX_bank #(
wire qual_is_snp_st0; wire qual_is_snp_st0;
wire qual_snp_invalidate_st0; wire qual_snp_invalidate_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0]; wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0]; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0];
@@ -313,17 +312,17 @@ module VX_bank #(
assign qual_is_fill_st0 = dfpq_pop_unqual; assign qual_is_fill_st0 = dfpq_pop_unqual;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 : mrvq_pop_unqual ? mrvq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 : snrq_pop_unqual ? snrq_addr_st0 :
0; 0;
if (`WORD_SELECT_WIDTH != 0) begin if (`WORD_SELECT_WIDTH != 0) begin
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 : mrvq_pop_unqual ? mrvq_wsel_st0 :
0; 0;
end else begin end else begin
`UNUSED_VAR(mrvq_wsel_st0) `UNUSED_VAR(mrvq_wsel_st0)
assign qual_wsel_st0 = 0; assign qual_wsel_st0 = 0;
@@ -355,11 +354,11 @@ module VX_bank #(
assign qual_from_mrvq_st0 = mrvq_pop_unqual; assign qual_from_mrvq_st0 = mrvq_pop_unqual;
`DEBUG_BLOCK( `ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0; assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end end
) `endif
VX_generic_register #( VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
@@ -408,18 +407,23 @@ module VX_bank #(
wire from_mrvq_st1e; wire from_mrvq_st1e;
wire mrvq_recover_ready_state_st1e; wire mrvq_recover_ready_state_st1e;
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1]; assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1];
assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2); assign st2_pending_hazard_st1e = (miss_add_because_miss)
&& ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2); assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e))
|| (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
assign mrvq_recover_ready_state_st1e = valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2 && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]); assign mrvq_recover_ready_state_st1e = valid_st1e
&& from_mrvq_st1e
&& recover_mrvq_state_st2
&& (addr_st2 == addr_st1[STAGE_1_CYCLES-1]);
VX_tag_data_access #( VX_tag_data_access #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
@@ -466,11 +470,12 @@ module VX_bank #(
.mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e) .mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e)
); );
`DEBUG_BLOCK( `ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
end end
) `endif
wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1];
wire from_mrvq_st1e_st2 = from_mrvq_st1e; wire from_mrvq_st1e_st2 = from_mrvq_st1e;
@@ -506,11 +511,11 @@ module VX_bank #(
.out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) .out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
); );
`DEBUG_BLOCK( `ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end end
) `endif
// Enqueue to miss reserv if it's a valid miss // Enqueue to miss reserv if it's a valid miss
assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2; assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2;
@@ -539,7 +544,9 @@ module VX_bank #(
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 ); assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 );
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]); assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]);
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1; assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2
|| mrvq_init_ready_state_hazard_st0_st1
|| mrvq_init_ready_state_hazard_st1e_st1;
VX_cache_miss_resrv #( VX_cache_miss_resrv #(
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
@@ -592,10 +599,8 @@ module VX_bank #(
// Enqueue core response // Enqueue core response
wire cwbq_push; wire cwbq_push, cwbq_pop;
wire cwbq_pop; wire cwbq_empty, cwbq_full;
wire cwbq_empty;
wire cwbq_full;
wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2; wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2;
assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full;
@@ -634,42 +639,22 @@ module VX_bank #(
// Enqueue DRAM fill request // Enqueue DRAM fill request
// `IGNORE_WARNINGS_BEGIN wire dram_fill_req_unqual = miss_add_unqual
// wire invalidate_fill; && (!mrvq_init_ready_state_st2
// `IGNORE_WARNINGS_END || (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
// VX_fill_invalidator #( assign dram_fill_req_valid = dram_fill_req_unqual
// .BANK_LINE_SIZE (BANK_LINE_SIZE), && !(dwbq_push_stall
// .NUM_BANKS (NUM_BANKS), || mrvq_push_stall
// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE) || cwbq_push_stall);
// ) fill_invalidator (
// .clk (clk),
// .reset (reset),
// .possible_fill (possible_fill),
// .success_fill (is_fill_st2),
// .fill_addr (fill_invalidator_addr),
// .invalidate_fill (invalidate_fill)
// );
wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
assign dram_fill_req_valid = dram_fill_req_unqual
&& dram_fill_req_ready
&& !( dwbq_push_stall
|| mrvq_push_stall
|| cwbq_push_stall);
assign dram_fill_req_addr = addr_st2; assign dram_fill_req_addr = addr_st2;
assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready; assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready;
// Enqueue DRAM writeback request // Enqueue DRAM writeback request
wire dwbq_push; wire dwbq_push, dwbq_pop;
wire dwbq_pop; wire dwbq_empty, dwbq_full;
wire dwbq_empty;
wire dwbq_full;
wire dwbq_is_dwb_in, dwbq_is_snp_in; wire dwbq_is_dwb_in, dwbq_is_snp_in;
wire dwbq_is_dwb_out, dwbq_is_snp_out; wire dwbq_is_dwb_out, dwbq_is_snp_out;
@@ -724,9 +709,9 @@ module VX_bank #(
assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0); assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0);
assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1); assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1);
assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire) assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire)
|| (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire) || (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire)
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire); || (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
// bank pipeline stall // bank pipeline stall
assign stall_bank_pipe = cwbq_push_stall assign stall_bank_pipe = cwbq_push_stall
@@ -735,53 +720,27 @@ module VX_bank #(
|| dram_fill_req_stall; || dram_fill_req_stall;
`ifdef DBG_PRINT_CACHE_BANK `ifdef DBG_PRINT_CACHE_BANK
if (NUM_BANKS == 1) begin always @(posedge clk) begin
always_ff @(posedge clk) begin if ((|core_req_valid) && core_req_ready) begin
if (core_req_valid && core_req_ready) begin $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr));
end
if (dram_wb_req_valid && dram_wb_req_ready) begin
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
end end
end else begin if (core_rsp_valid && core_rsp_ready) begin
always_ff @(posedge clk) begin $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
if ((|core_req_valid) && core_req_ready) begin end
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); if (dram_fill_req_valid && dram_fill_req_ready) begin
end $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
if (core_rsp_valid && core_rsp_ready) begin end
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); if (dram_wb_req_firevalid && dram_wb_req_ready) begin
end $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
if (dram_fill_req_valid && dram_fill_req_ready) begin end
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
end $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
if (dram_wb_req_valid && dram_wb_req_ready) begin end
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); if (snp_req_valid && snp_req_ready) begin
end $display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin end
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); if (snp_rsp_valid && snp_rsp_ready) begin
end $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
end end
end end
`endif `endif

View File

@@ -65,6 +65,8 @@ module VX_cache #(
// Snooping forward tag width // Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1 parameter SNP_FWD_TAG_WIDTH = 1
) ( ) (
`SCOPE_SIGNALS_ICACHE_IO
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -125,7 +127,7 @@ module VX_cache #(
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready
); );
`DEBUG_BLOCK( `ifdef DBG_CORE_REQ_INFO
wire[31:0] debug_core_req_use_pc; wire[31:0] debug_core_req_use_pc;
wire[1:0] debug_core_req_wb; wire[1:0] debug_core_req_wb;
wire[4:0] debug_core_req_rd; wire[4:0] debug_core_req_rd;
@@ -135,7 +137,8 @@ module VX_cache #(
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0]; assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
end end
) `endif
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_req_ready;
@@ -476,7 +479,13 @@ module VX_cache #(
.per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
.snp_rsp_valid (snp_rsp_valid), .snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag), .snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready) .snp_rsp_ready (snp_rsp_ready)
); );
`SCOPE_ASSIGN(scope_idram_req_valid, per_bank_dram_fill_req_valid[0]);
`SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready);
`SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]);
`SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]);
endmodule endmodule

View File

@@ -72,8 +72,6 @@
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} `define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define LINE_TO_BYTE_ADDR(x, i) {x, `BANK_SELECT_BITS'(i), `BASE_ADDR_BITS'(0)} `define LINE_TO_BYTE_ADDR(x, i) {x, (((`BANK_SELECT_BITS + `BASE_ADDR_BITS)'(i)) << `BASE_ADDR_BITS)}
`define LINE_TO_BYTE_ADDR0(x) {x, `BASE_ADDR_BITS'(0)}
`endif `endif

View File

@@ -153,35 +153,18 @@ module VX_cache_miss_resrv #(
`ifdef DBG_PRINT_CACHE_MSRQ `ifdef DBG_PRINT_CACHE_MSRQ
integer j; integer j;
if (NUM_BANKS == 1) begin always @(posedge clk) begin
always_ff @(posedge clk) begin if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); for (j = 0; j < MRVQ_SIZE; j++) begin
for (j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin
if (valid_table[j]) begin $write(" ");
$write(" "); if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); if (~ready_table[j]) $write("!");
if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
$write("addr%0d=%0h", j, {addr_table[j], `BASE_ADDR_BITS'(0)});
end
end end
$write("\n");
end
end
end else begin
always_ff @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
end
$write("\n");
end end
$write("\n");
end end
end end
`endif `endif

View File

@@ -116,7 +116,7 @@ module VX_snp_forwarder #(
end end
`ifdef DBG_PRINT_CACHE_SNP `ifdef DBG_PRINT_CACHE_SNP
always_ff @(posedge clk) begin always @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin if (snp_req_valid && snp_req_ready) begin
$display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag); $display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag);
end end

View File

@@ -17,112 +17,105 @@ module VX_divide #(
output reg [WIDTHD-1:0] remainder output reg [WIDTHD-1:0] remainder
); );
// synthesis read_comments_as_HDL on
// localparam IMPL = "quartus";
// synthesis read_comments_as_HDL off
// altera translate_off
localparam IMPL="fallback";
// altera translate_on
generate generate
if (NREP != DREP) begin if (NREP != DREP) begin
different_nrep_drep_not_yet_supported non_existing_module(); different_nrep_drep_not_yet_supported non_existing_module();
end end
if (IMPL == "quartus") begin `ifdef QUARTUS
localparam lpm_speed=SPEED == "HIGHEST" ? 9:5; localparam lpm_speed=SPEED == "HIGHEST" ? 9 : 5;
lpm_divide #( lpm_divide #(
.LPM_WIDTHN(WIDTHN), .LPM_WIDTHN(WIDTHN),
.LPM_WIDTHD(WIDTHD), .LPM_WIDTHD(WIDTHD),
.LPM_NREPRESENTATION(NREP), .LPM_NREPRESENTATION(NREP),
.LPM_DREPRESENTATION(DREP), .LPM_DREPRESENTATION(DREP),
.LPM_PIPELINE(PIPELINE), .LPM_PIPELINE(PIPELINE),
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator .LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
.MAXIMIZE_SPEED(lpm_speed) .MAXIMIZE_SPEED(lpm_speed)
) quartus_divider ( ) quartus_divider (
.clock(clock), .clock(clock),
.aclr(aclr), .aclr(aclr),
.clken(clken), .clken(clken),
.numer(numer), .numer(numer),
.denom(denom), .denom(denom),
.quotient(quotient), .quotient(quotient),
.remain(remainder) .remain(remainder)
); );
end
else begin
wire [WIDTHN-1:0] numer_pipe_end; `else
wire [WIDTHD-1:0] denom_pipe_end;
if (PIPELINE == 0) begin wire [WIDTHN-1:0] numer_pipe_end;
assign numer_pipe_end = numer; wire [WIDTHD-1:0] denom_pipe_end;
assign denom_pipe_end = denom;
end else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
genvar i; if (PIPELINE == 0) begin
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages assign numer_pipe_end = numer;
always @(posedge clock or posedge aclr) begin assign denom_pipe_end = denom;
if (aclr) begin end else begin
numer_pipe[i+1] <= 0; reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
denom_pipe[i+1] <= 0; reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
end
else if (clken) begin
numer_pipe[i+1] <= numer_pipe[i];
denom_pipe[i+1] <= denom_pipe[i];
end
end
end
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin always @(posedge clock or posedge aclr) begin
if (aclr) begin if (aclr) begin
numer_pipe[0] <= 0; numer_pipe[i+1] <= 0;
denom_pipe[0] <= 0; denom_pipe[i+1] <= 0;
end end
else if (clken) begin else if (clken) begin
numer_pipe[0] <= numer; numer_pipe[i+1] <= numer_pipe[i];
denom_pipe[0] <= denom; denom_pipe[i+1] <= denom_pipe[i];
end end
end end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end end
/* * * * * * * * * * * * * * * * * * * * * * */ always @(posedge clock or posedge aclr) begin
/* Do the actual fallback computation here */ if (aclr) begin
/* * * * * * * * * * * * * * * * * * * * * * */ numer_pipe[0] <= 0;
denom_pipe[0] <= 0;
if (NREP == "SIGNED") begin end
else if (clken) begin
always @(*) begin numer_pipe[0] <= numer;
if (denom_pipe_end == 0) begin denom_pipe[0] <= denom;
quotient = 32'hffffffff;
remainder = numer_pipe_end;
end
else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
quotient = 0;
remainder = 0;
end
else begin
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
end
end end
end
else begin
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
end end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (NREP == "SIGNED") begin
always @(*) begin
if (denom_pipe_end == 0) begin
quotient = 32'hffffffff;
remainder = numer_pipe_end;
end
else if (denom_pipe_end == 32'hffffffff
&& numer_pipe_end == 32'h80000000) begin
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
quotient = 0;
remainder = 0;
end
else begin
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
end
end
end
else begin
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
end
`endif
endgenerate endgenerate
endmodule : VX_divide endmodule : VX_divide

View File

@@ -2,21 +2,17 @@
module VX_generic_priority_encoder #( module VX_generic_priority_encoder #(
parameter N = 1 parameter N = 1
) ( ) (
input wire[N-1:0] valids, input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index, output reg[(`LOG2UP(N))-1:0] index,
output reg[(`LOG2UP(N))-1:0] index, output reg found
//output reg[`LOG2UP(N):0] index, // eh );
output reg found
);
integer i; integer i;
always @(*) begin always @(*) begin
index = 0; index = 0;
found = 0; found = 0;
for (i = N-1; i >= 0; i = i - 1) begin for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`LOG2UP(N))-1:0]; index = i[(`LOG2UP(N))-1:0];
found = 1; found = 1;
end end

View File

@@ -132,7 +132,7 @@ module VX_generic_queue #(
rd_ptr_r <= rd_ptr_next_r; rd_ptr_r <= rd_ptr_next_r;
if (SIZE > 2) begin if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + 2; rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
end else begin // (SIZE == 2); end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r; rd_ptr_next_r <= ~rd_ptr_next_r;
end end

View File

@@ -19,110 +19,102 @@ module VX_mult #(
output reg [WIDTHP-1:0] result output reg [WIDTHP-1:0] result
); );
// synthesis read_comments_as_HDL on
// localparam IMPL = "quartus";
// synthesis read_comments_as_HDL off
// altera translate_off
localparam IMPL="fallback";
// altera translate_on
generate generate
if (IMPL == "quartus") begin `ifdef QUARTUS
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5; localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
if (FORCE_LE == "YES") begin
lpm_mult #(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult (
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
else begin
lpm_mult#(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult(
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
if (FORCE_LE == "YES") begin
lpm_mult #(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult (
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end end
else begin else begin
lpm_mult#(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult(
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
wire [WIDTHA-1:0] dataa_pipe_end; `else
wire [WIDTHB-1:0] datab_pipe_end;
if (PIPELINE == 0) begin wire [WIDTHA-1:0] dataa_pipe_end;
assign dataa_pipe_end = dataa; wire [WIDTHB-1:0] datab_pipe_end;
assign datab_pipe_end = datab;
end else begin
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
genvar i; if (PIPELINE == 0) begin
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages assign dataa_pipe_end = dataa;
always @(posedge clock or posedge aclr) begin assign datab_pipe_end = datab;
if (aclr) begin end else begin
dataa_pipe[i+1] <= 0; reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
datab_pipe[i+1] <= 0; reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
end
else if (clken) begin
dataa_pipe[i+1] <= dataa_pipe[i];
datab_pipe[i+1] <= datab_pipe[i];
end
end
end
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin always @(posedge clock or posedge aclr) begin
if (aclr) begin if (aclr) begin
dataa_pipe[0] <= 0; dataa_pipe[i+1] <= 0;
datab_pipe[0] <= 0; datab_pipe[i+1] <= 0;
end end
else if (clken) begin else if (clken) begin
dataa_pipe[0] <= dataa; dataa_pipe[i+1] <= dataa_pipe[i];
datab_pipe[0] <= datab; datab_pipe[i+1] <= datab_pipe[i];
end end
end end
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
assign datab_pipe_end = datab_pipe[PIPELINE-1];
end end
/* * * * * * * * * * * * * * * * * * * * * * */ always @(posedge clock or posedge aclr) begin
/* Do the actual fallback computation here */ if (aclr) begin
/* * * * * * * * * * * * * * * * * * * * * * */ dataa_pipe[0] <= 0;
datab_pipe[0] <= 0;
if (REP == "SIGNED") begin end
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end)); else if (clken) begin
end dataa_pipe[0] <= dataa;
else begin datab_pipe[0] <= datab;
assign result = dataa_pipe_end*datab_pipe_end; end
end end
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
assign datab_pipe_end = datab_pipe[PIPELINE-1];
end end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (REP == "SIGNED") begin
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end));
end
else begin
assign result = dataa_pipe_end * datab_pipe_end;
end
`endif
endgenerate endgenerate
endmodule: VX_mult endmodule: VX_mult

View File

@@ -18,26 +18,22 @@ module VX_scope #(
input wire bus_write, input wire bus_write,
input wire bus_read input wire bus_read
); );
localparam DELTA_ENABLE = (UPDW != 0); localparam DELTA_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1; localparam MAX_DELTA = (2 ** DELTAW) - 1;
typedef enum logic[2:0] { localparam CMD_GET_VALID = 3'd0;
CMD_GET_VALID, localparam CMD_GET_DATA = 3'd1;
CMD_GET_DATA, localparam CMD_GET_WIDTH = 3'd2;
CMD_GET_WIDTH, localparam CMD_GET_COUNT = 3'd3;
CMD_GET_COUNT, localparam CMD_SET_DELAY = 3'd4;
CMD_SET_DELAY, localparam CMD_SET_STOP = 3'd5;
CMD_SET_STOP, localparam CMD_RESERVED1 = 3'd6;
CMD_RESERVED1, localparam CMD_RESERVED2 = 3'd7;
CMD_RESERVED2
} cmd_t;
typedef enum logic[1:0] { localparam GET_VALID = 2'd0;
GET_VALID, localparam GET_DATA = 2'd1;
GET_DATA, localparam GET_WIDTH = 2'd2;
GET_WIDTH, localparam GET_COUNT = 2'd3;
GET_COUNT
} cmd_get_t;
reg [DATAW-1:0] data_store [SIZE-1:0]; reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0]; reg [DELTAW-1:0] delta_store [SIZE-1:0];
@@ -84,10 +80,10 @@ module VX_scope #(
CMD_GET_VALID, CMD_GET_VALID,
CMD_GET_DATA, CMD_GET_DATA,
CMD_GET_WIDTH, CMD_GET_WIDTH,
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type); CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data); CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:; default:;
endcase endcase
end end
@@ -183,7 +179,7 @@ module VX_scope #(
end end
`ifdef DBG_PRINT_SCOPE `ifdef DBG_PRINT_SCOPE
always_ff @(posedge clk) begin always @(posedge clk) begin
if (bus_read) begin if (bus_read) begin
$display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr); $display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr);
end end

View File

@@ -28,6 +28,7 @@ VF += --x-initial unique
VF += -exe $(SRCS) $(INCLUDE) VF += -exe $(SRCS) $(INCLUDE)
DBG += -DVCD_OUTPUT $(DBG_PRINT) DBG += -DVCD_OUTPUT $(DBG_PRINT)
DBG += -DDBG_CORE_REQ_INFO
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')

View File

@@ -11,11 +11,12 @@ double sc_time_stamp() {
Simulator::Simulator() { Simulator::Simulator() {
// force random values for unitialized signals // force random values for unitialized signals
Verilated::randReset(1); Verilated::randReset(2);
ram_ = nullptr; ram_ = nullptr;
vortex_ = new VVortex_Socket(); vortex_ = new VVortex_Socket();
dram_rsp_active_ = false;
snp_req_active_ = false; snp_req_active_ = false;
#ifdef VCD_OUTPUT #ifdef VCD_OUTPUT
@@ -76,7 +77,7 @@ void Simulator::eval_dram_bus() {
return; return;
} }
// handle DRAM response cycle // schedule DRAM responses
int dequeue_index = -1; int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) { for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) { if (dram_rsp_vec_[i].cycles_left > 0) {
@@ -88,16 +89,23 @@ void Simulator::eval_dram_bus() {
} }
} }
// handle DRAM response message // send DRAM response
if ((dequeue_index != -1) if (dram_rsp_active_
&& vortex_->dram_rsp_valid
&& vortex_->dram_rsp_ready) { && vortex_->dram_rsp_ready) {
vortex_->dram_rsp_valid = 1; dram_rsp_active_ = false;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); }
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; if (!dram_rsp_active_) {
free(dram_rsp_vec_[dequeue_index].data); if (dequeue_index != -1) {
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); vortex_->dram_rsp_valid = 1;
} else { memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_valid = 0; vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
dram_rsp_active_ = true;
} else {
vortex_->dram_rsp_valid = 0;
}
} }
// handle DRAM stalls // handle DRAM stalls
@@ -111,7 +119,7 @@ void Simulator::eval_dram_bus() {
} }
#endif #endif
// handle DRAM requests // process DRAM requests
if (!dram_stalled) { if (!dram_stalled) {
if (vortex_->dram_req_valid) { if (vortex_->dram_req_valid) {
if (vortex_->dram_req_rw) { if (vortex_->dram_req_rw) {

View File

@@ -55,8 +55,9 @@ private:
void eval_snp_bus(); void eval_snp_bus();
std::vector<dram_req_t> dram_rsp_vec_; std::vector<dram_req_t> dram_rsp_vec_;
int dram_rsp_active_;
uint32_t snp_req_active_; bool snp_req_active_;
uint32_t snp_req_size_; uint32_t snp_req_size_;
uint32_t pending_snp_reqs_; uint32_t pending_snp_reqs_;

View File

@@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization # Project initialization
$(PROJECT_FILES): $(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache" quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/cache"
syn.chg: syn.chg:
$(STAMP) syn.chg $(STAMP) syn.chg

View File

@@ -1 +0,0 @@
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]

View File

@@ -1,67 +0,0 @@
load_package flow
package require cmdline
set options {
{ "project.arg" "" "Project name" }
{ "family.arg" "" "Device family name" }
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
set q_args_orig $quartus(args)
array set opts [::cmdline::getoptions quartus(args) $options]
# Verify required parameters
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set idx 0
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close

View File

@@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization # Project initialization
$(PROJECT_FILES): $(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs"
syn.chg: syn.chg:
$(STAMP) syn.chg $(STAMP) syn.chg

View File

@@ -1,67 +0,0 @@
load_package flow
package require cmdline
set options {
{ "project.arg" "" "Project name" }
{ "family.arg" "" "Device family name" }
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
set q_args_orig $quartus(args)
array set opts [::cmdline::getoptions quartus(args) $options]
# Verify required parameters
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set idx 0
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close

View File

@@ -33,6 +33,9 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set idx 0 set idx 0
foreach arg $q_args_orig { foreach arg $q_args_orig {

View File

@@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization # Project initialization
$(PROJECT_FILES): $(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" -macro "NOPAE" quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -set "NOPAE" -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip"
syn.chg: syn.chg:
$(STAMP) syn.chg $(STAMP) syn.chg

View File

@@ -1,9 +0,0 @@
set_time_format -unit ns -decimal_places 3
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View File

@@ -1,67 +1,70 @@
load_package flow PROJECT = VX_vortex
package require cmdline TOP_LEVEL_ENTITY = VX_vortex
SRC_FILE = VX_vortex.v
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
set options { # Part, Family
{ "project.arg" "" "Project name" } FAMILY = "Arria 10"
{ "family.arg" "" "Device family name" } DEVICE = 10AX115N3F40E2SG
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
set q_args_orig $quartus(args) # Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --do_report_timing
array set opts [::cmdline::getoptions quartus(args) $options] # Build targets
all: $(PROJECT).sta.rpt
# Verify required parameters syn: $(PROJECT).syn.rpt
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_new $opts(project) -overwrite fit: $(PROJECT).fit.rpt
set_global_assignment -name FAMILY $opts(family) asm: $(PROJECT).asm.rpt
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set idx 0 sta: $(PROJECT).sta.rpt
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
proc make_all_pins_virtual {} { smart: smart.log
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual # Target implementations
STAMP = echo done >
project_close $(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View File

@@ -1,9 +0,0 @@
set_time_format -unit ns -decimal_places 3
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View File

@@ -1,41 +0,0 @@
load_package flow
package require cmdline
set options { \
{ "project.arg" "" "Project name" } \
{ "family.arg" "" "Device family name" } \
{ "device.arg" "" "Device name" } \
{ "top.arg" "" "Top level module" } \
{ "sdc.arg" "" "Timing Design Constraints file" } \
{ "src.arg" "" "Verilog source file" } \
{ "inc.arg" "." "Include path" } \
}
array set opts [::cmdline::getoptions quartus(args) $options]
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name VERILOG_FILE $opts(src)
set_global_assignment -name SEARCH_PATH $opts(inc)
set_global_assignment -name SDC_FILE $opts(sdc)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close