simulation framework refactoring

This commit is contained in:
Blaise Tine
2021-10-09 10:20:42 -04:00
parent 51673665b5
commit 54bddeee9c
89 changed files with 1217 additions and 1471 deletions

View File

@@ -1,10 +1,16 @@
all: stub rtlsim simx opae
all: stub rtlsim simx vlsim
stub:
$(MAKE) -C stub
opae:
$(MAKE) -C opae
fpga:
$(MAKE) -C fpga
asesim:
$(MAKE) -C asesim
vlsim:
$(MAKE) -C vlsim
rtlsim:
$(MAKE) -C rtlsim
@@ -14,8 +20,10 @@ simx:
clean:
$(MAKE) clean -C stub
$(MAKE) clean -C opae
$(MAKE) clean -C fpga
$(MAKE) clean -C asesim
$(MAKE) clean -C vlsim
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
.PHONY: all stub opae rtlsim simx clean
.PHONY: all stub fpga asesim vlsim rtlsim simx clean

75
driver/asesim/Makefile Normal file
View File

@@ -0,0 +1,75 @@
OPAE_HOME ?= /tools/opae/1.4.0
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../hw -I$(OPAE_HOME)/include
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c-ase
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
PROJECT = libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
all: $(PROJECT)
# AFU info from JSON file, including AFU UUID
json: ../../hw/opae/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
$(PROJECT): $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) -o $(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -14,7 +14,7 @@
#include <opae/fpga.h>
#include <uuid/uuid.h>
#elif defined(USE_VLSIM)
#include "vlsim/fpga.h"
#include <fpga.h>
#endif
#include <vortex.h>

View File

@@ -5,7 +5,7 @@ CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
LDFLAGS += -L$(OPAE_HOME)/lib
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c
#SCOPE=1
@@ -29,45 +29,29 @@ CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
FPGA_LIBS += -luuid -lopae-c
ASE_LIBS += -luuid -lopae-c-ase
VLSIM_LIBS += -lopae-c-vlsim
ASE_DIR = ase
VLSIM_DIR = vlsim
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
PROJECT = libvortex.so
PROJECT_ASE = $(ASE_DIR)/libvortex.so
PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += vx_scope.cpp
SCOPE_ENABLE = SCOPE=1
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
PERF_ENABLE = PERF=1
endif
all: vlsim
all: $(PROJECT)
# AFU info from JSON file, including AFU UUID
json: ../../hw/opae/vortex_afu.json
@@ -79,38 +63,15 @@ scope-defs.h: $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
vlsim-hw: $(SCOPE_H)
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
fpga: $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
vlsim: $(SRCS) vlsim-hw
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
vortex.o: vortex.cpp
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
$(ASE_DIR):
mkdir -p ase
$(PROJECT): $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) -o $(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean-fpga:
clean:
rm -rf $(PROJECT) *.o .depend
clean-asesim:
rm -rf $(PROJECT_ASE) *.o .depend
clean-vlsim:
$(MAKE) -C vlsim clean
clean: clean-fpga clean-asesim clean-vlsim
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1 +0,0 @@
/obj_dir/*

View File

@@ -1,98 +0,0 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -DUSE_VLSIM -fPIC -Wno-maybe-uninitialized
CFLAGS += -I../../../../hw
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CFLAGS += $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
# LDFLAGS += -dynamiclib -pthread
TOP = vortex_afu_shim
RTL_DIR=../../../hw/rtl
DPI_DIR=../../../hw/dpi
SRCS = fpga.cpp opae_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
VL_FLAGS += -DSCOPE
CFLAGS += -DSCOPE
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
endif
# use our OPAE shim
VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
PROJECT = libopae-c-vlsim.so
all: $(PROJECT)
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
$(PROJECT): $(SRCS) vortex_afu.h
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h

View File

@@ -1,84 +0,0 @@
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <unistd.h>
#include <assert.h>
#include "fpga.h"
#include "opae_sim.h"
#include <VX_config.h>
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
if (NULL == handle || flags != 0)
return FPGA_INVALID_PARAM;
auto sim = new opae_sim();
*handle = reinterpret_cast<fpga_handle>(sim);
return FPGA_OK;
}
extern fpga_result fpgaClose(fpga_handle handle) {
if (NULL == handle)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
delete sim;
return FPGA_OK;
}
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
if (ret != 0)
return FPGA_NO_MEMORY;
return FPGA_OK;
}
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
if (NULL == handle)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->release_buffer(wsid);
return FPGA_OK;
}
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
if (NULL == handle || ioaddr == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->get_io_address(wsid, ioaddr);
return FPGA_OK;
}
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
if (NULL == handle || mmio_num != 0)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->write_mmio64(mmio_num, offset, value);
return FPGA_OK;
}
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
if (NULL == handle || mmio_num != 0 || value == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->read_mmio64(mmio_num, offset, value);
return FPGA_OK;
}
extern const char *fpgaErrStr(fpga_result e) {
return "";
}

View File

@@ -1,48 +0,0 @@
#ifndef __FPGA_H__
#define __FPGA_H__
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
FPGA_OK = 0, /**< Operation completed successfully */
FPGA_INVALID_PARAM, /**< Invalid parameter supplied */
FPGA_BUSY, /**< Resource is busy */
FPGA_EXCEPTION, /**< An exception occurred */
FPGA_NOT_FOUND, /**< A required resource was not found */
FPGA_NO_MEMORY, /**< Not enough memory to complete operation */
FPGA_NOT_SUPPORTED, /**< Requested operation is not supported */
FPGA_NO_DRIVER, /**< Driver is not loaded */
FPGA_NO_DAEMON, /**< FPGA Daemon (fpgad) is not running */
FPGA_NO_ACCESS, /**< Insufficient privileges or permissions */
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
} fpga_result;
typedef void *fpga_handle;
typedef void *fpga_token;
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
fpga_result fpgaClose(fpga_handle handle);
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
const char *fpgaErrStr(fpga_result e);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // __FPGA_H__

View File

@@ -1,401 +0,0 @@
#include "opae_sim.h"
#include <iostream>
#include <fstream>
#include <iomanip>
#define CCI_LATENCY 8
#define CCI_RAND_MOD 8
#define CCI_RQ_SIZE 16
#define CCI_WQ_SIZE 16
#define ENABLE_MEM_STALLS
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
#ifndef MEM_LATENCY
#define MEM_LATENCY 24
#endif
#ifndef MEM_RQ_SIZE
#define MEM_RQ_SIZE 16
#endif
#ifndef MEM_STALLS_MODULO
#define MEM_STALLS_MODULO 16
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
static void *__aligned_malloc(size_t alignment, size_t size) {
// reserve margin for alignment and storing of unaligned address
size_t margin = (alignment-1) + sizeof(void*);
void *unaligned_addr = malloc(size + margin);
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
aligned_addr[-1] = unaligned_addr;
return aligned_addr;
}
static void __aligned_free(void *ptr) {
// retreive the stored unaligned address and use it to free the allocation
void* unaligned_addr = ((void**)ptr)[-1];
free(unaligned_addr);
}
///////////////////////////////////////////////////////////////////////////////
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
///////////////////////////////////////////////////////////////////////////////
opae_sim::opae_sim()
: stop_(false)
, host_buffer_ids_(0)
{
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// Turn off assertion before reset
Verilated::assertOn(false);
vortex_afu_ = new Vvortex_afu_shim();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
vortex_afu_->trace(trace_, 99);
trace_->open("trace.vcd");
#endif
// reset the device
this->reset();
// launch execution thread
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->step();
}
});
}
opae_sim::~opae_sim() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
#ifdef VCD_OUTPUT
trace_->close();
delete trace_;
#endif
for (auto& buffer : host_buffers_) {
__aligned_free(buffer.second.data);
}
delete vortex_afu_;
}
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
if (alloc == NULL)
return -1;
host_buffer_t buffer;
buffer.data = (uint64_t*)alloc;
buffer.size = len;
buffer.ioaddr = uintptr_t(alloc);
auto buffer_id = host_buffer_ids_++;
host_buffers_.emplace(buffer_id, buffer);
*buf_addr = alloc;
*wsid = buffer_id;
return 0;
}
void opae_sim::release_buffer(uint64_t wsid) {
auto it = host_buffers_.find(wsid);
if (it != host_buffers_.end()) {
__aligned_free(it->second.data);
host_buffers_.erase(it);
}
}
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
*ioaddr = host_buffers_[wsid].ioaddr;
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
}
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}
///////////////////////////////////////////////////////////////////////////////
void opae_sim::reset() {
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_reads_[b].clear();
vortex_afu_->avs_readdatavalid[b] = 0;
vortex_afu_->avs_waitrequest[b] = 0;
}
vortex_afu_->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
}
vortex_afu_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void opae_sim::step() {
this->sRxPort_bus();
this->sTxPort_bus();
this->avs_bus();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
}
void opae_sim::eval() {
vortex_afu_->eval();
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
trace_->dump(timestamp);
}
#endif
++timestamp;
}
void opae_sim::sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
// schedule CCI read responses
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
cci_rd_it = it;
}
}
// schedule CCI write responses
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
cci_wr_it = it;
}
}
// send CCI write response
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_it != cci_writes_.end()) {
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
cci_writes_.erase(cci_wr_it);
}
// send CCI read response (ensure mmio disabled)
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/
cci_reads_.erase(cci_rd_it);
}
}
void opae_sim::sTxPort_bus() {
// process read requests
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
cci_reads_.emplace_back(cci_req);
}
// process write requests
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
cci_wr_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.emplace_back(cci_req);
}
// check queues overflow
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void opae_sim::avs_bus() {
for (int b = 0; b < MEMORY_BANKS; ++b) {
// update memory responses schedule
for (auto& rsp : mem_reads_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
if (!mem_reads_[b].empty()
&& (0 == mem_reads_[b].begin()->cycles_left)) {
mem_rd_it = mem_reads_[b].begin();
}
// send memory response
vortex_afu_->avs_readdatavalid[b] = 0;
if (mem_rd_it != mem_reads_[b].end()) {
vortex_afu_->avs_readdatavalid[b] = 1;
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_[b].erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
if (vortex_afu_->avs_write[b]) {
uint64_t byteen = vortex_afu_->avs_byteenable[b];
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read[b]) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address[b];
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_[b]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest[b] = mem_stalled;
}
}

View File

@@ -1,104 +0,0 @@
#pragma once
#include <verilated.h>
#include "Vvortex_afu_shim.h"
#include "Vvortex_afu_shim__Syms.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <VX_config.h>
#include "vortex_afu.h"
#include "ram.h"
#include <ostream>
#include <future>
#include <list>
#include <unordered_map>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
class opae_sim {
public:
opae_sim();
virtual ~opae_sim();
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
void release_buffer(uint64_t wsid);
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
private:
typedef struct {
int cycles_left;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
} mem_rd_req_t;
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
uint64_t* data;
size_t size;
uint64_t ioaddr;
} host_buffer_t;
void reset();
void eval();
void step();
void sRxPort_bus();
void sTxPort_bus();
void avs_bus();
std::future<void> future_;
bool stop_;
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_;
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM ram_;
Vvortex_afu_shim *vortex_afu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
};

View File

@@ -1,64 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View File

@@ -1,10 +0,0 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -file "../rtl/fp_cores/fpnew/*"

View File

@@ -1,170 +0,0 @@
`include "VX_platform.vh"
`IGNORE_WARNINGS_BEGIN
`include "vortex_afu.vh"
`IGNORE_WARNINGS_END
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */
`include "VX_define.vh"
module vortex_afu_shim (
// global signals
input clk,
input reset,
// IF signals between CCI and AFU
input logic vcp2af_sRxPort_c0_TxAlmFull,
input logic vcp2af_sRxPort_c1_TxAlmFull,
input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used,
input logic vcp2af_sRxPort_c0_hdr_rsvd1,
input logic vcp2af_sRxPort_c0_hdr_hit_miss,
input logic [1:0] vcp2af_sRxPort_c0_hdr_rsvd0,
input t_ccip_clNum vcp2af_sRxPort_c0_hdr_cl_num,
input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type,
input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata,
input t_ccip_clData vcp2af_sRxPort_c0_data,
input logic vcp2af_sRxPort_c0_rspValid,
input logic vcp2af_sRxPort_c0_mmioRdValid,
input logic vcp2af_sRxPort_c0_mmioWrValid,
input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address,
input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length,
input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd,
input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid,
input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used,
input logic vcp2af_sRxPort_c1_hdr_rsvd1,
input logic vcp2af_sRxPort_c1_hdr_hit_miss,
input logic vcp2af_sRxPort_c1_hdr_format,
input logic vcp2af_sRxPort_c1_hdr_rsvd0,
input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num,
input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type,
input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata,
input logic vcp2af_sRxPort_c1_rspValid,
output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel,
output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1,
output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len,
output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type,
output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0,
output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address,
output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata,
output logic af2cp_sTxPort_c0_valid,
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2,
output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel,
output logic af2cp_sTxPort_c1_hdr_sop,
output logic af2cp_sTxPort_c1_hdr_rsvd1,
output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len,
output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type,
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0,
output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address,
output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata,
output t_ccip_clData af2cp_sTxPort_c1_data,
output logic af2cp_sTxPort_c1_valid,
output t_ccip_tid af2cp_sTxPort_c2_hdr_tid,
output logic af2cp_sTxPort_c2_mmioRdValid,
output t_ccip_mmioData af2cp_sTxPort_c2_data,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
);
t_if_ccip_Rx cp2af_sRxPort;
t_if_ccip_Tx af2cp_sTxPort;
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
) afu (
.clk(clk),
.reset(reset),
.cp2af_sRxPort(cp2af_sRxPort),
.af2cp_sTxPort(af2cp_sTxPort),
.avs_writedata(avs_writedata),
.avs_readdata(avs_readdata),
.avs_address(avs_address),
.avs_waitrequest(avs_waitrequest),
.avs_write(avs_write),
.avs_read(avs_read),
.avs_byteenable(avs_byteenable),
.avs_burstcount(avs_burstcount),
.avs_readdatavalid(avs_readdatavalid)
);
t_if_ccip_c0_RxHdr c0_RxHdr;
always @ (*) begin
c0_RxHdr = 'x;
if (vcp2af_sRxPort_c0_mmioWrValid || vcp2af_sRxPort_c0_mmioRdValid) begin
c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address;
c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length;
c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd;
c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid;
end else begin
c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used;
c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1;
c0_RxHdr.rspMemHdr.hit_miss = vcp2af_sRxPort_c0_hdr_hit_miss;
c0_RxHdr.rspMemHdr.rsvd0 = vcp2af_sRxPort_c0_hdr_rsvd0;
c0_RxHdr.rspMemHdr.cl_num = vcp2af_sRxPort_c0_hdr_cl_num;
c0_RxHdr.rspMemHdr.resp_type = vcp2af_sRxPort_c0_hdr_resp_type;
c0_RxHdr.rspMemHdr.mdata = vcp2af_sRxPort_c0_hdr_mdata;
end
end
assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull;
assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull;
assign cp2af_sRxPort.c0.hdr = c0_RxHdr;
assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data;
assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid;
assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid;
assign cp2af_sRxPort.c0.mmioWrValid = vcp2af_sRxPort_c0_mmioWrValid;
assign cp2af_sRxPort.c1.hdr.vc_used = vcp2af_sRxPort_c1_hdr_vc_used;
assign cp2af_sRxPort.c1.hdr.rsvd1 = vcp2af_sRxPort_c1_hdr_rsvd1;
assign cp2af_sRxPort.c1.hdr.hit_miss = vcp2af_sRxPort_c1_hdr_hit_miss;
assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format;
assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0;
assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num;
assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type;
assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata;
assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid;
assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel;
assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1;
assign af2cp_sTxPort_c0_hdr_cl_len = af2cp_sTxPort.c0.hdr.cl_len;
assign af2cp_sTxPort_c0_hdr_req_type = af2cp_sTxPort.c0.hdr.req_type;
assign af2cp_sTxPort_c0_hdr_rsvd0 = af2cp_sTxPort.c0.hdr.rsvd0;
assign af2cp_sTxPort_c0_hdr_address = af2cp_sTxPort.c0.hdr.address;
assign af2cp_sTxPort_c0_hdr_mdata = af2cp_sTxPort.c0.hdr.mdata;
assign af2cp_sTxPort_c0_valid = af2cp_sTxPort.c0.valid;
assign af2cp_sTxPort_c1_hdr_rsvd2 = af2cp_sTxPort.c1.hdr.rsvd2;
assign af2cp_sTxPort_c1_hdr_vc_sel = af2cp_sTxPort.c1.hdr.vc_sel;
assign af2cp_sTxPort_c1_hdr_sop = af2cp_sTxPort.c1.hdr.sop;
assign af2cp_sTxPort_c1_hdr_rsvd1 = af2cp_sTxPort.c1.hdr.rsvd1;
assign af2cp_sTxPort_c1_hdr_cl_len = af2cp_sTxPort.c1.hdr.cl_len;
assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type;
assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0;
assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address;
assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata;
assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data;
assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid;
assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid;
assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid;
assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data;
endmodule

View File

@@ -1,24 +0,0 @@
//
// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json
//
#ifndef __AFU_JSON_INFO__
#define __AFU_JSON_INFO__
#define AFU_ACCEL_NAME "vortex_afu"
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
#define AFU_IMAGE_CMD_MEM_READ 1
#define AFU_IMAGE_CMD_MEM_WRITE 2
#define AFU_IMAGE_CMD_RUN 3
#define AFU_IMAGE_MMIO_CMD_TYPE 10
#define AFU_IMAGE_MMIO_DATA_SIZE 16
#define AFU_IMAGE_MMIO_IO_ADDR 12
#define AFU_IMAGE_MMIO_MEM_ADDR 14
#define AFU_IMAGE_MMIO_SCOPE_READ 20
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
#define AFU_IMAGE_MMIO_DEV_CAPS 24
#define AFU_IMAGE_MMIO_STATUS 18
#define AFU_IMAGE_POWER 0
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
#endif // __AFU_JSON_INFO__

View File

@@ -1,90 +1,38 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
RTLSIM_DIR = ../../sim/rtlsim
CFLAGS += -DUSE_RTLSIM -fPIC -Wno-maybe-uninitialized
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
CXXFLAGS += -I../include -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
LDFLAGS += $(RTLSIM_DIR)/librtlsim.a
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
# Position independent code
CXXFLAGS += -fPIC
CFLAGS += $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
# Add external configuration
CXXFLAGS += $(CONFIGS)
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
ifdef AXI_BUS
TOP = Vortex_axi
CFLAGS += -DAXI_BUS
else
TOP = Vortex
endif
LDFLAGS += -shared
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
SRCS = vortex.cpp ../common/vx_utils.cpp
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
PROJECT = libvortex.so
# PROJECT = libvortex.dylib
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
$(MAKE) -C $(RTLSIM_DIR) static
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
rm -rf $(PROJECT) obj_dir
$(MAKE) -C $(RTLSIM_DIR) clean-objdir
rm -rf $(PROJECT) *.o .depend

View File

@@ -1,64 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View File

@@ -1,10 +0,0 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -file "../rtl/fp_cores/fpnew/*"

View File

@@ -8,15 +8,11 @@
#include <vortex.h>
#include <VX_config.h>
#include <ram.h>
#include <mem.h>
#include <util.h>
#include <simulator.h>
///////////////////////////////////////////////////////////////////////////////
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
@@ -58,7 +54,7 @@ private:
class vx_device {
public:
vx_device() {
vx_device() : ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
}
@@ -92,7 +88,7 @@ public:
}
printf("\n");*/
ram_.write(dest_addr, asize, (const uint8_t*)src + src_offset);
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
return 0;
}
@@ -101,7 +97,7 @@ public:
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
/*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset));
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {

View File

@@ -1,37 +1,29 @@
PROJECT = libvortex.so
#PROJECT = libvortex.dylib
SIMX_DIR = ../../simX
SIMX_DIR = ../../sim/simX
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -DUSE_SIMX -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
LDFLAGS += $(SIMX_DIR)/libsimX.a
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
# Debugigng
ifndef DEBUG
CXXFLAGS += -DNDEBUG
endif
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(MAKE) -C $(SIMX_DIR) static
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
$(MAKE) -C $(SIMX_DIR) clean-objdir
rm -rf $(PROJECT) *.o .depend

View File

@@ -10,15 +10,11 @@
#include <vortex.h>
#include <core.h>
#include <VX_config.h>
#include <util.h>
#define PAGE_SIZE 4096
#define PAGE_SIZE 4096
///////////////////////////////////////////////////////////////////////////////
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
@@ -74,7 +70,7 @@ public:
mem_allocation_ = ALLOC_BASE_ADDR;
mmu_.attach(ram_, 0, 0xffffffff);
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_[i] = std::make_shared<vortex::Core>(arch_, decoder_, mmu_, i);
cores_[i] = std::make_shared<Core>(arch_, decoder_, mmu_, i);
}
}
@@ -101,7 +97,7 @@ public:
if (dest_addr + asize > ram_.size())
return -1;
ram_.write(dest_addr, (const uint8_t*)src + src_offset, asize);
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
@@ -116,7 +112,7 @@ public:
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, (uint8_t*)dest + dest_offset, asize);
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
@@ -209,15 +205,15 @@ private:
device->thread_proc();
}
vortex::ArchDef arch_;
vortex::Decoder decoder_;
vortex::MemoryUnit mmu_;
std::vector<std::shared_ptr<vortex::Core>> cores_;
ArchDef arch_;
Decoder decoder_;
MemoryUnit mmu_;
std::vector<std::shared_ptr<Core>> cores_;
bool is_done_;
bool is_running_;
size_t mem_allocation_;
std::thread thread_;
vortex::RAM ram_;
RAM ram_;
std::mutex mutex_;
};

64
driver/vlsim/Makefile Normal file
View File

@@ -0,0 +1,64 @@
VLSIM_DIR = ../../sim/vlsim
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../hw -I$(VLSIM_DIR)
LDFLAGS += $(VLSIM_DIR)/libopae-c-vlsim.a
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
AFU_JSON_INFO = vortex_afu.h
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = libvortex.so
all: $(PROJECT)
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
$(PROJECT): $(SRCS) $(SCOPE_H)
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C $(VLSIM_DIR) static
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -o $(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean:
$(MAKE) -C $(VLSIM_DIR) clean-objdir
rm -rf $(PROJECT) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif