diff --git a/driver/Makefile b/driver/Makefile index 215a0362..58de93a2 100644 --- a/driver/Makefile +++ b/driver/Makefile @@ -1,4 +1,4 @@ -all: stub rtlsim simx +all: stub rtlsim simx opae stub: $(MAKE) -C stub diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 9946470f..a2ebdd05 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -1,9 +1,10 @@ +OPAE_HOME ?= /tools/opae/1.4.0 CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../hw +CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw -LDFLAGS += -L/tools/opae/1.4.0/lib +LDFLAGS += -L$(OPAE_HOME)/lib # stack execution protection LDFLAGS +=-z noexecstack @@ -23,49 +24,60 @@ CXXFLAGS += -DDUMP_PERF_STATS # Enable scope analyzer #CXXFLAGS += -DSCOPE -LDFLAGS += -luuid - LDFLAGS += -shared -FPGA_LIBS += -lopae-c +FPGA_LIBS += -luuid -lopae-c -ASE_LIBS += -lopae-c-ase +ASE_LIBS += -luuid -lopae-c-ase + +VLSIM_LIBS += -lopae-c-vlsim LIB_DIR=../lib ASE_DIR = ase +VLSIM_DIR = vlsim + PROJECT = libvortex.so PROJECT_ASE = $(ASE_DIR)/libvortex.so +PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so + AFU_JSON_INFO = vortex_afu.h -SRCS = vortex.cpp scope.cpp ../common/vx_utils.cpp +SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp -all: $(PROJECT) $(PROJECT_ASE) +all: vlsim # AFU info from JSON file, including AFU UUID -$(AFU_JSON_INFO): ../../hw/opae/vortex_afu.json +json: ../../hw/opae/vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@ +fpga: $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) -$(PROJECT_ASE): $(SRCS) $(ASE_DIR) - $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@ +ase: $(SRCS) $(ASE_DIR) + $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) -vortex.o: vortex.cpp $(AFU_JSON_INFO) +vlsim: $(SRCS) opae-vlsim + $(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM) + +opae-vlsim: + $(MAKE) -C vlsim + +vortex.o: vortex.cpp $(CXX) $(CXXFLAGS) -c vortex.cpp -o $@ $(ASE_DIR): mkdir -p ase -.depend: $(SRCS) $(AFU_JSON_INFO) +.depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend; clean: - rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend + rm -rf $(PROJECT) $(PROJECT_ASE) $(PROJECT_VLSIM) *.o .depend + $(MAKE) -C vlsim clean ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile new file mode 100644 index 00000000..a30b6eda --- /dev/null +++ b/driver/opae/vlsim/Makefile @@ -0,0 +1,78 @@ +#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors +CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors + +CFLAGS += -I../../../../hw + +# control RTL debug print states +DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE +DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM +DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE +DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE + +DBG_FLAGS += $(DBG_PRINT_FLAGS) +DBG_FLAGS += -DDBG_CORE_REQ_INFO + +#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 + +#DEBUG=1 + +CFLAGS += -fPIC + +CFLAGS += -DUSE_RTLSIM $(CONFIGS) + +CFLAGS += -DDUMP_PERF_STATS + +LDFLAGS += -shared -pthread +# LDFLAGS += -dynamiclib -pthread + +TOP = vortex_afu_shim + +RTL_DIR = ../../../hw/rtl + +SRCS = fpga.cpp opae_sim.cpp +SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp + +FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) + +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS) +VL_FLAGS += -Wno-DECLFILENAME +VL_FLAGS += --x-initial unique --x-assign unique +VL_FLAGS += verilator.vlt + +# Enable Verilator multithreaded simulation +#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') +#VL_FLAGS += --threads $(THREADS) + +# Debugigng +ifdef DEBUG + VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS) + CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) +else + VL_FLAGS += -DNDEBUG + CFLAGS += -DNDEBUG +endif + +VL_FLAGS += -DNOPAE +CFLAGS += -DNOPAE +VL_FLAGS += -DSCOPE +CFLAGS += -DSCOPE +RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip + +PROJECT = libopae-c-vlsim.so + +all: $(PROJECT) + +$(PROJECT): $(SRCS) + verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) + make -j -C obj_dir -f V$(TOP).mk + +clean: + rm -rf $(PROJECT) obj_dir diff --git a/driver/opae/vlsim/fpga.cpp b/driver/opae/vlsim/fpga.cpp new file mode 100644 index 00000000..3607f26e --- /dev/null +++ b/driver/opae/vlsim/fpga.cpp @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include +#include +#include "fpga.h" +#include "opae_sim.h" +#include + +extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) { + if (NULL == handle || flags != 0) + return FPGA_INVALID_PARAM; + auto sim = new opae_sim(); + *handle = reinterpret_cast(sim); + return FPGA_OK; +} + +extern fpga_result fpgaClose(fpga_handle handle) { + if (NULL == handle) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + delete sim; + + return FPGA_OK; +} + +extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->prepare_buffer(len, buf_addr, wsid, flags); + + return FPGA_OK; +} + +extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) { + if (NULL == handle) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->release_buffer(wsid); + + return FPGA_OK; +} + +extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) { + if (NULL == handle || ioaddr == NULL) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->get_io_address(wsid, ioaddr); + + return FPGA_OK; +} + +extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) { + if (NULL == handle || mmio_num != 0) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->write_mmio64(mmio_num, offset, value); + + return FPGA_OK; +} + +extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) { + if (NULL == handle || mmio_num != 0 || value == NULL) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->read_mmio64(mmio_num, offset, value); + + return FPGA_OK; +} + +extern const char *fpgaErrStr(fpga_result e) { + return ""; +} \ No newline at end of file diff --git a/driver/opae/vlsim/fpga.h b/driver/opae/vlsim/fpga.h new file mode 100644 index 00000000..e67c22b7 --- /dev/null +++ b/driver/opae/vlsim/fpga.h @@ -0,0 +1,48 @@ +#ifndef __FPGA_H__ +#define __FPGA_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + FPGA_OK = 0, /**< Operation completed successfully */ + FPGA_INVALID_PARAM, /**< Invalid parameter supplied */ + FPGA_BUSY, /**< Resource is busy */ + FPGA_EXCEPTION, /**< An exception occurred */ + FPGA_NOT_FOUND, /**< A required resource was not found */ + FPGA_NO_MEMORY, /**< Not enough memory to complete operation */ + FPGA_NOT_SUPPORTED, /**< Requested operation is not supported */ + FPGA_NO_DRIVER, /**< Driver is not loaded */ + FPGA_NO_DAEMON, /**< FPGA Daemon (fpgad) is not running */ + FPGA_NO_ACCESS, /**< Insufficient privileges or permissions */ + FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */ +} fpga_result; + +typedef void *fpga_handle; + +typedef void *fpga_token; + +fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags); + +fpga_result fpgaClose(fpga_handle handle); + +fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags); + +fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid); + +fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr); + +fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value); + +fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value); + +const char *fpgaErrStr(fpga_result e); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __FPGA_H__ diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp new file mode 100644 index 00000000..58debc6a --- /dev/null +++ b/driver/opae/vlsim/opae_sim.cpp @@ -0,0 +1,261 @@ +#include "opae_sim.h" +#include +#include +#include + +#define CCI_LATENCY 8 +#define CCI_RQ_SIZE 16 +#define CCI_WQ_SIZE 16 + +#define ENABLE_DRAM_STALLS +#define DRAM_LATENCY 4 +#define DRAM_RQ_SIZE 16 +#define DRAM_STALLS_MODULO 16 + +uint64_t timestamp = 0; + +double sc_time_stamp() { + return timestamp; +} + +opae_sim::opae_sim() { + // force random values for unitialized signals + Verilated::randReset(2); + Verilated::randSeed(50); + + // Turn off assertion before reset + Verilated::assertOn(false); + + stop_ = false; + vortex_afu_ = new Vvortex_afu_shim(); + +#ifdef VCD_OUTPUT + Verilated::traceEverOn(true); + trace_ = new VerilatedVcdC(); + vortex_afu_->trace(trace_, 99); + trace_->open("trace.vcd"); +#endif + + future_ = std::async(std::launch::async, [&]{ + this->reset(); + while (stop_) { + this->step(); + } + }); +} + +opae_sim::~opae_sim() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } +#ifdef VCD_OUTPUT + trace_->close(); +#endif + delete vortex_afu_; +} + +void opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + host_alloc_t alloc; + alloc.data = new uint8_t[len]; + alloc.size = len; + *wsid = host_allocs_.size(); + host_allocs_.push_back(alloc); +} + +void opae_sim::release_buffer(uint64_t wsid) { + delete [] host_allocs_[wsid].data; + host_allocs_.erase(host_allocs_.begin() + wsid); +} + +void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { + *ioaddr = (intptr_t)host_allocs_[wsid].data / GLOBAL_BLOCK_SIZE; +} + +void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1; + vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset; + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8); +} + +void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; + vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset; + while (0 == vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); + *value = vortex_afu_->af2cp_sTxPort_c2_data; +} + +/////////////////////////////////////////////////////////////////////////////// + +void opae_sim::reset() { + vortex_afu_->clk = 0; + this->eval(); + + vortex_afu_->clk = 1; + this->eval(); +} + +void opae_sim::step() { + vortex_afu_->clk = 0; + this->eval(); + + vortex_afu_->clk = 1; + this->eval(); + + this->sRxPort_bus(); + this->sTxPort_bus(); + this->avs_bus(); +} + +void opae_sim::eval() { + vortex_afu_->eval(); +#ifdef VCD_OUTPUT + trace_->dump(timestamp); +#endif + ++timestamp; +} + +void opae_sim::sRxPort_bus() { + // schedule CCI read responses + int cci_rd_index = -1; + for (int i = 0; i < cci_reads_.size(); i++) { + if (cci_reads_[i].cycles_left > 0) { + cci_reads_[i].cycles_left -= 1; + } + if ((cci_rd_index == -1) + && (cci_reads_[i].cycles_left == 0)) { + cci_rd_index = i; + } + } + + // schedule CCI write responses + int cci_wr_index = -1; + for (int i = 0; i < cci_writes_.size(); i++) { + if (cci_writes_[i].cycles_left > 0) { + cci_writes_[i].cycles_left -= 1; + } + if ((cci_wr_index == -1) + && (cci_writes_[i].cycles_left == 0)) { + cci_wr_index = i; + } + } + + // send CCI read response + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + if (cci_rd_index != -1) { + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), GLOBAL_BLOCK_SIZE); + vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata; + cci_reads_.erase(cci_reads_.begin() + cci_rd_index); + } + + // send CCI write response + vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; + if (cci_wr_index != -1) { + vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1; + vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata; + cci_writes_.erase(cci_writes_.begin() + cci_wr_index); + } + + // mmio + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; +} + +void opae_sim::sTxPort_bus() { + // check read queue size + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE); + + // check write queue size + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE); + + // process read requests + if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) { + cci_rd_req_t cci_req; + cci_req.cycles_left = CCI_LATENCY; + cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; + auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c0_hdr_address); + memcpy(cci_req.block.data(), host_ptr, GLOBAL_BLOCK_SIZE); + cci_reads_.push_back(cci_req); + } + + // process write requests + if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) { + cci_wr_req_t cci_req; + cci_req.cycles_left = CCI_LATENCY; + cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata; + auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c1_hdr_address); + memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, GLOBAL_BLOCK_SIZE); + cci_writes_.push_back(cci_req); + } +} + +void opae_sim::avs_bus() { + // schedule DRAM read responses + int dram_rd_index = -1; + for (int i = 0; i < dram_reads_.size(); i++) { + if (dram_reads_[i].cycles_left > 0) { + dram_reads_[i].cycles_left -= 1; + } + if ((dram_rd_index == -1) + && (dram_reads_[i].cycles_left == 0)) { + dram_rd_index = i; + } + } + + // send DRAM response + vortex_afu_->avs_readdatavalid = 0; + if (dram_rd_index != -1) { + vortex_afu_->avs_readdatavalid = 1; + memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), GLOBAL_BLOCK_SIZE); + dram_reads_.erase(dram_reads_.begin() + dram_rd_index); + } + + // handle DRAM stalls + bool dram_stalled = false; +#ifdef ENABLE_DRAM_STALLS + if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) { + dram_stalled = true; + } else + if (dram_reads_.size() >= DRAM_RQ_SIZE) { + dram_stalled = true; + } +#endif + + // process DRAM requests + if (!dram_stalled) { + if (vortex_afu_->avs_write) { + assert(0 == vortex_afu_->mem_bank_select); + uint64_t byteen = vortex_afu_->avs_byteenable; + unsigned base_addr = (vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE); + uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata); + for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + ram_[base_addr + i] = data[i]; + } + } + } + if (vortex_afu_->avs_read) { + assert(0 == vortex_afu_->mem_bank_select); + dram_rd_req_t dram_req; + dram_req.cycles_left = DRAM_LATENCY; + ram_.read(vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); + dram_reads_.push_back(dram_req); + } + } + + vortex_afu_->avs_waitrequest = dram_stalled; +} + +uint8_t* opae_sim::find_host_ptr(uint64_t addr) { + auto b_addr = addr * GLOBAL_BLOCK_SIZE; + for (auto& host_alloc : host_allocs_) { + auto alloc_addr = (intptr_t)host_alloc.data; + if (b_addr >= alloc_addr + && b_addr < (alloc_addr + host_alloc.size)) { + return (uint8_t*)b_addr; + } + } + assert(false); + return nullptr; +} \ No newline at end of file diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h new file mode 100644 index 00000000..8467af42 --- /dev/null +++ b/driver/opae/vlsim/opae_sim.h @@ -0,0 +1,87 @@ +#pragma once + +#include "Vvortex_afu_shim.h" +#include "Vvortex_afu_shim__Syms.h" +#include "verilated.h" + +#ifdef VCD_OUTPUT +#include +#endif + +#include +#include "ram.h" + +#include +#include +#include + +class opae_sim { +public: + + opae_sim(); + virtual ~opae_sim(); + + void prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); + + void release_buffer(uint64_t wsid); + + void get_io_address(uint64_t wsid, uint64_t *ioaddr); + + void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value); + + void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value); + +private: + + typedef struct { + int cycles_left; + std::array block; + unsigned tag; + } dram_rd_req_t; + + typedef struct { + int cycles_left; + std::array block; + unsigned mdata; + } cci_rd_req_t; + + typedef struct { + int cycles_left; + std::array block; + unsigned mdata; + } cci_wr_req_t; + + typedef struct { + uint8_t* data; + size_t size; + } host_alloc_t; + + void reset(); + + void eval(); + + void step(); + + void sRxPort_bus(); + void sTxPort_bus(); + void avs_bus(); + + uint8_t* find_host_ptr(uint64_t addr); + + std::future future_; + bool stop_; + + std::vector host_allocs_; + + std::vector dram_reads_; + + std::vector cci_reads_; + + std::vector cci_writes_; + + RAM ram_; + Vvortex_afu_shim *vortex_afu_; +#ifdef VCD_OUTPUT + VerilatedVcdC *trace_; +#endif +}; \ No newline at end of file diff --git a/driver/opae/vlsim/ram.h b/driver/opae/vlsim/ram.h new file mode 100644 index 00000000..53df7e0f --- /dev/null +++ b/driver/opae/vlsim/ram.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include + +class RAM { +private: + + mutable uint8_t *mem_[(1 << 12)]; + + uint8_t *get(uint32_t address) const { + uint32_t block_addr = address >> 20; + uint32_t block_offset = address & 0x000FFFFF; + if (mem_[block_addr] == NULL) { + mem_[block_addr] = new uint8_t[(1 << 20)]; + } + return mem_[block_addr] + block_offset; + } + +public: + + RAM() { + for (uint32_t i = 0; i < (1 << 12); i++) { + mem_[i] = NULL; + } + } + + ~RAM() { + this->clear(); + } + + size_t size() const { + return (1ull << 32); + } + + void clear() { + for (uint32_t i = 0; i < (1 << 12); i++) { + if (mem_[i]) { + delete mem_[i]; + mem_[i] = NULL; + } + } + } + + void read(uint32_t address, uint32_t length, uint8_t *data) const { + for (unsigned i = 0; i < length; i++) { + data[i] = *this->get(address + i); + } + } + + void write(uint32_t address, uint32_t length, const uint8_t *data) { + for (unsigned i = 0; i < length; i++) { + *this->get(address + i) = data[i]; + } + } + + uint8_t& operator[](uint32_t address) { + return *get(address); + } + + const uint8_t& operator[](uint32_t address) const { + return *get(address); + } +}; \ No newline at end of file diff --git a/driver/opae/vlsim/verilator.vlt b/driver/opae/vlsim/verilator.vlt new file mode 100644 index 00000000..cb799b65 --- /dev/null +++ b/driver/opae/vlsim/verilator.vlt @@ -0,0 +1,9 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*" +lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*" \ No newline at end of file diff --git a/hw/opae/vortex_afu_sim.v b/driver/opae/vlsim/vortex_afu_shim.sv similarity index 99% rename from hw/opae/vortex_afu_sim.v rename to driver/opae/vlsim/vortex_afu_shim.sv index d3f9d8e1..ce007856 100644 --- a/hw/opae/vortex_afu_sim.v +++ b/driver/opae/vlsim/vortex_afu_shim.sv @@ -1,11 +1,11 @@ `include "vortex_afu.vh" - +`include "VX_define.vh" /* verilator lint_off IMPORTSTAR */ import ccip_if_pkg::*; import local_mem_cfg_pkg::*; /* verilator lint_on IMPORTSTAR */ -module vortex_afu_sim #( +module vortex_afu_shim #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( // global signals diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index e66b9afe..a5174316 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -6,13 +6,20 @@ #include #include #include -#include + +#ifdef USE_VLSIM +#include "vlsim/fpga.h" +#else #include +#include +#endif + #include #include #include "vortex_afu.h" + #ifdef SCOPE -#include "scope.h" +#include "vx_scope.h" #endif #define CACHE_LINESIZE 64 @@ -122,14 +129,16 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) { extern int vx_dev_open(vx_device_h* hdevice) { if (nullptr == hdevice) return -1; - - fpga_properties filter = nullptr; - fpga_result res; - fpga_guid guid; - fpga_token accel_token; - uint32_t num_matches; - fpga_handle accel_handle; + + fpga_result res; + fpga_handle accel_handle; vx_device_t* device; + +#ifndef USE_VLSIM + fpga_token accel_token; + fpga_properties filter = nullptr; + fpga_guid guid; + uint32_t num_matches; // Set up a filter that will search for an accelerator fpgaGetProperties(nullptr, &filter); @@ -159,6 +168,13 @@ extern int vx_dev_open(vx_device_h* hdevice) { // Done with token fpgaDestroyToken(&accel_token); +#else + // Open accelerator + res = fpgaOpen(NULL, &accel_handle, 0); + if (FPGA_OK != res) { + return -1; + } +#endif // allocate device object device = (vx_device_t*)malloc(sizeof(vx_device_t)); diff --git a/driver/opae/vortex_afu.h b/driver/opae/vortex_afu.h new file mode 100644 index 00000000..c31e1a9e --- /dev/null +++ b/driver/opae/vortex_afu.h @@ -0,0 +1,30 @@ +// +// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json +// + +#ifndef __AFU_JSON_INFO__ +#define __AFU_JSON_INFO__ + +#define AFU_ACCEL_NAME "vortex_afu" +#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C" +#define AFU_IMAGE_CMD_CLFLUSH 4 +#define AFU_IMAGE_CMD_CSR_READ 5 +#define AFU_IMAGE_CMD_CSR_WRITE 6 +#define AFU_IMAGE_CMD_MEM_READ 1 +#define AFU_IMAGE_CMD_MEM_WRITE 2 +#define AFU_IMAGE_CMD_RUN 3 +#define AFU_IMAGE_MMIO_CMD_TYPE 10 +#define AFU_IMAGE_MMIO_CSR_ADDR 26 +#define AFU_IMAGE_MMIO_CSR_CORE 24 +#define AFU_IMAGE_MMIO_CSR_DATA 28 +#define AFU_IMAGE_MMIO_CSR_READ 30 +#define AFU_IMAGE_MMIO_DATA_SIZE 16 +#define AFU_IMAGE_MMIO_IO_ADDR 12 +#define AFU_IMAGE_MMIO_MEM_ADDR 14 +#define AFU_IMAGE_MMIO_SCOPE_READ 20 +#define AFU_IMAGE_MMIO_SCOPE_WRITE 22 +#define AFU_IMAGE_MMIO_STATUS 18 +#define AFU_IMAGE_POWER 0 +#define AFU_TOP_IFC "ccip_std_afu_avalon_mm" + +#endif // __AFU_JSON_INFO__ diff --git a/driver/opae/scope.cpp b/driver/opae/vx_scope.cpp similarity index 98% rename from driver/opae/scope.cpp rename to driver/opae/vx_scope.cpp index 6b12a7ab..41709e37 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -4,8 +4,15 @@ #include #include #include + +#ifdef USE_VLSIM +#include "vlsim/fpga.h" +#else +#include +#endif + #include -#include "scope.h" +#include "vx_scope.h" #include "vortex_afu.h" #define CHECK_RES(_expr) \ @@ -18,12 +25,6 @@ return -1; \ } while (false) - -template -constexpr bool static_print() { - return (0 < N < 100); -} - #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -33,8 +34,7 @@ struct scope_signal_t { }; constexpr int ilog2(int n) { - return (n > 1) ? 1 + - ilog2(n >> 1) : 0; + return (n > 1) ? 1 + ilog2(n >> 1) : 0; } static constexpr int NW_BITS = ilog2(NUM_WARPS); diff --git a/driver/opae/scope.h b/driver/opae/vx_scope.h similarity index 84% rename from driver/opae/scope.h rename to driver/opae/vx_scope.h index 0d9dd2d2..f2d5518e 100644 --- a/driver/opae/scope.h +++ b/driver/opae/vx_scope.h @@ -1,7 +1,5 @@ #pragma once -#include - int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1); \ No newline at end of file diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index d3d0df94..ec4252b5 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -22,7 +22,6 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 #DEBUG=1 -#AFU=1 CFLAGS += -fPIC @@ -35,11 +34,13 @@ LDFLAGS += -shared -pthread TOP = Vortex -SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp -SRCS += ../../hw/rtl/fp_cores/svdpi/float_dpi.cpp +RTL_DIR = ../../hw/rtl -FPU_INCLUDE = -I../../hw/rtl/fp_cores -I../../hw/rtl/fp_cores/svdpi -I../../hw/rtl/fp_cores/fpnew/src/common_cells/include -I../../hw/rtl/fp_cores/fpnew/src/common_cells/src -I../../hw/rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../hw/rtl/fp_cores/fpnew/src -RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache $(FPU_INCLUDE) +SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp +SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp + +FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS) VL_FLAGS += -Wno-DECLFILENAME @@ -59,16 +60,6 @@ else CFLAGS += -DNDEBUG endif -# AFU -ifdef AFU - TOP = vortex_afu_sim - VL_FLAGS += -DNOPAE - CFLAGS += -DNOPAE - VL_FLAGS += -DSCOPE - CFLAGS += -DSCOPE - RTL_INCLUDE += -I../../hw/opae -I../../hw/opae/ccip -endif - PROJECT = libvortex.so # PROJECT = libvortex.dylib diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 99a5599f..a6d90dbd 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -3,6 +3,11 @@ #include #include +#define ENABLE_DRAM_STALLS +#define DRAM_LATENCY 4 +#define DRAM_RQ_SIZE 16 +#define DRAM_STALLS_MODULO 16 + uint64_t timestamp = 0; double sc_time_stamp() { diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index a16218bb..f43e1d8b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -14,17 +14,6 @@ #include #include -#define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 4 -#define DRAM_RQ_SIZE 16 -#define DRAM_STALLS_MODULO 16 - -typedef struct { - int cycles_left; - std::array block; - unsigned tag; -} dram_req_t; - class Simulator { public: @@ -53,6 +42,12 @@ public: private: + typedef struct { + int cycles_left; + std::array block; + unsigned tag; + } dram_req_t; + void eval(); void eval_dram_bus(); @@ -61,7 +56,7 @@ private: void eval_snp_bus(); std::vector dram_rsp_vec_; - int dram_rsp_active_; + bool dram_rsp_active_; bool snp_req_active_; bool csr_req_active_;