diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index d62da96a..1c964b6b 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/convolution/Makefile b/benchmarks/opencl/convolution/Makefile index 3138e01a..24fb0878 100644 --- a/benchmarks/opencl/convolution/Makefile +++ b/benchmarks/opencl/convolution/Makefile @@ -11,7 +11,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error CXXFLAGS += -I$(POCL_RT_PATH)/include -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/simx -lOpenCL -lvortex PROJECT = convolution @@ -26,16 +26,19 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile index a716b790..0bee95c0 100644 --- a/benchmarks/opencl/guassian/Makefile +++ b/benchmarks/opencl/guassian/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index f8636dbb..df77a758 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/nearn/Makefile b/benchmarks/opencl/nearn/Makefile index 3024f31d..062c6141 100644 --- a/benchmarks/opencl/nearn/Makefile +++ b/benchmarks/opencl/nearn/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 6f725541..4ed95ad5 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index 3a7f86e6..075d032a 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/sgemm/Makefile b/benchmarks/opencl/sgemm/Makefile index 0f789a28..2564b317 100644 --- a/benchmarks/opencl/sgemm/Makefile +++ b/benchmarks/opencl/sgemm/Makefile @@ -29,7 +29,7 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/transpose/Makefile b/benchmarks/opencl/transpose/Makefile index d1d8e0fe..2bf258d0 100644 --- a/benchmarks/opencl/transpose/Makefile +++ b/benchmarks/opencl/transpose/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/benchmarks/opencl/vecadd/Makefile b/benchmarks/opencl/vecadd/Makefile index bc025b21..b141ca06 100644 --- a/benchmarks/opencl/vecadd/Makefile +++ b/benchmarks/opencl/vecadd/Makefile @@ -29,7 +29,10 @@ run-fpga: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + +run-vlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) run-simx: $(PROJECT) kernel.pocl LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/driver/include/vortex.h b/driver/include/vortex.h index e5aa9eb6..36b11a53 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -34,7 +34,7 @@ int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value); int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer); // Get host pointer address -volatile void* vx_host_ptr(vx_buffer_h hbuffer); +void* vx_host_ptr(vx_buffer_h hbuffer); // release buffer int vx_buf_release(vx_buffer_h hbuffer); diff --git a/driver/opae/vlsim/fpga.cpp b/driver/opae/vlsim/fpga.cpp index 3607f26e..b88a58ec 100644 --- a/driver/opae/vlsim/fpga.cpp +++ b/driver/opae/vlsim/fpga.cpp @@ -32,7 +32,9 @@ extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **bu return FPGA_INVALID_PARAM; auto sim = reinterpret_cast(handle); - sim->prepare_buffer(len, buf_addr, wsid, flags); + int ret = sim->prepare_buffer(len, buf_addr, wsid, flags); + if (ret != 0) + return FPGA_NO_MEMORY; return FPGA_OK; } @@ -77,6 +79,16 @@ extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_ return FPGA_OK; } +extern fpga_result fpgaFlush(fpga_handle handle) { + if (NULL == handle) + return FPGA_INVALID_PARAM; + + auto sim = reinterpret_cast(handle); + sim->flush(); + + return FPGA_OK; +} + extern const char *fpgaErrStr(fpga_result e) { return ""; } \ No newline at end of file diff --git a/driver/opae/vlsim/fpga.h b/driver/opae/vlsim/fpga.h index e67c22b7..fda1c000 100644 --- a/driver/opae/vlsim/fpga.h +++ b/driver/opae/vlsim/fpga.h @@ -39,6 +39,8 @@ fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offs fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value); +fpga_result fpgaFlush(fpga_handle handle); + const char *fpgaErrStr(fpga_result e); #ifdef __cplusplus diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index 58debc6a..bddb9f79 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -4,6 +4,7 @@ #include #define CCI_LATENCY 8 +#define CCI_RAND_MOD 8 #define CCI_RQ_SIZE 16 #define CCI_WQ_SIZE 16 @@ -26,7 +27,6 @@ opae_sim::opae_sim() { // Turn off assertion before reset Verilated::assertOn(false); - stop_ = false; vortex_afu_ = new Vvortex_afu_shim(); #ifdef VCD_OUTPUT @@ -34,14 +34,17 @@ opae_sim::opae_sim() { trace_ = new VerilatedVcdC(); vortex_afu_->trace(trace_, 99); trace_->open("trace.vcd"); -#endif +#endif - future_ = std::async(std::launch::async, [&]{ - this->reset(); - while (stop_) { + this->reset(); + + stop_ = false; + future_ = std::async(std::launch::async, [&]{ + while (!stop_) { + std::lock_guard guard(mutex_); this->step(); } - }); + }); } opae_sim::~opae_sim() { @@ -55,44 +58,68 @@ opae_sim::~opae_sim() { delete vortex_afu_; } -void opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { - host_alloc_t alloc; - alloc.data = new uint8_t[len]; - alloc.size = len; - *wsid = host_allocs_.size(); - host_allocs_.push_back(alloc); +int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + auto alloc = aligned_alloc(CACHE_BLOCK_SIZE, len); + if (alloc == NULL) + return -1; + host_buffer_t buffer; + buffer.data = (uint64_t*)alloc; + buffer.size = len; + buffer.ioaddr = intptr_t(alloc) / CACHE_BLOCK_SIZE; + auto index = host_buffers_.size(); + host_buffers_.push_back(buffer); + *buf_addr = alloc; + *wsid = index; + return 0; } void opae_sim::release_buffer(uint64_t wsid) { - delete [] host_allocs_[wsid].data; - host_allocs_.erase(host_allocs_.begin() + wsid); + free(host_buffers_[wsid].data); + host_buffers_.erase(host_buffers_.begin() + wsid); } void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { - *ioaddr = (intptr_t)host_allocs_[wsid].data / GLOBAL_BLOCK_SIZE; + *ioaddr = host_buffers_[wsid].ioaddr * CACHE_BLOCK_SIZE; } void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { + std::lock_guard guard(mutex_); + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1; - vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8); + this->step(); + assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid); } void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + std::lock_guard guard(mutex_); + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; - vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = offset; - while (0 == vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + this->step(); + assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid); + assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); *value = vortex_afu_->af2cp_sTxPort_c2_data; } +void opae_sim::flush() { + // flush pending CCI requests +} + /////////////////////////////////////////////////////////////////////////////// void opae_sim::reset() { - vortex_afu_->clk = 0; - this->eval(); + vortex_afu_->reset = 1; + this->step(); + vortex_afu_->reset = 0; - vortex_afu_->clk = 1; - this->eval(); + // Turn on assertion after reset + Verilated::assertOn(true); } void opae_sim::step() { @@ -144,7 +171,7 @@ void opae_sim::sRxPort_bus() { vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; if (cci_rd_index != -1) { vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; - memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), GLOBAL_BLOCK_SIZE); + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE); vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata; cci_reads_.erase(cci_reads_.begin() + cci_rd_index); } @@ -172,20 +199,20 @@ void opae_sim::sTxPort_bus() { // process read requests if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) { cci_rd_req_t cci_req; - cci_req.cycles_left = CCI_LATENCY; + cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; - auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c0_hdr_address); - memcpy(cci_req.block.data(), host_ptr, GLOBAL_BLOCK_SIZE); + auto host_ptr = this->to_host_ptr(vortex_afu_->af2cp_sTxPort_c0_hdr_address); + memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); cci_reads_.push_back(cci_req); } // process write requests if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) { cci_wr_req_t cci_req; - cci_req.cycles_left = CCI_LATENCY; + cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata; - auto host_ptr = this->find_host_ptr(vortex_afu_->af2cp_sTxPort_c1_hdr_address); - memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, GLOBAL_BLOCK_SIZE); + auto host_ptr = this->to_host_ptr(vortex_afu_->af2cp_sTxPort_c1_hdr_address); + memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); cci_writes_.push_back(cci_req); } } @@ -207,7 +234,7 @@ void opae_sim::avs_bus() { vortex_afu_->avs_readdatavalid = 0; if (dram_rd_index != -1) { vortex_afu_->avs_readdatavalid = 1; - memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), GLOBAL_BLOCK_SIZE); + memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE); dram_reads_.erase(dram_reads_.begin() + dram_rd_index); } @@ -227,9 +254,9 @@ void opae_sim::avs_bus() { if (vortex_afu_->avs_write) { assert(0 == vortex_afu_->mem_bank_select); uint64_t byteen = vortex_afu_->avs_byteenable; - unsigned base_addr = (vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE); + unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata); - for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) { + for (int i = 0; i < CACHE_BLOCK_SIZE; i++) { if ((byteen >> i) & 0x1) { ram_[base_addr + i] = data[i]; } @@ -239,7 +266,8 @@ void opae_sim::avs_bus() { assert(0 == vortex_afu_->mem_bank_select); dram_rd_req_t dram_req; dram_req.cycles_left = DRAM_LATENCY; - ram_.read(vortex_afu_->avs_address * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); + unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); + ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); dram_reads_.push_back(dram_req); } } @@ -247,15 +275,14 @@ void opae_sim::avs_bus() { vortex_afu_->avs_waitrequest = dram_stalled; } -uint8_t* opae_sim::find_host_ptr(uint64_t addr) { - auto b_addr = addr * GLOBAL_BLOCK_SIZE; - for (auto& host_alloc : host_allocs_) { - auto alloc_addr = (intptr_t)host_alloc.data; - if (b_addr >= alloc_addr - && b_addr < (alloc_addr + host_alloc.size)) { - return (uint8_t*)b_addr; +uint64_t* opae_sim::to_host_ptr(uint64_t ioaddr) { + for (auto& buffer : host_buffers_) { + if (ioaddr >= buffer.ioaddr + && ioaddr < (buffer.ioaddr + buffer.size)) { + return buffer.data + (ioaddr - buffer.ioaddr) * (CACHE_BLOCK_SIZE / 8); } } - assert(false); + printf("error: to_host_ptr(0x%lx) failed\n", ioaddr); + std::abort(); return nullptr; } \ No newline at end of file diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 8467af42..1c03075f 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -15,13 +15,15 @@ #include #include +#define CACHE_BLOCK_SIZE 64 + class opae_sim { public: opae_sim(); virtual ~opae_sim(); - void prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); void release_buffer(uint64_t wsid); @@ -31,30 +33,32 @@ public: void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value); + void flush(); + private: typedef struct { int cycles_left; - std::array block; + std::array block; unsigned tag; } dram_rd_req_t; typedef struct { int cycles_left; - std::array block; + std::array block; unsigned mdata; } cci_rd_req_t; typedef struct { int cycles_left; - std::array block; unsigned mdata; } cci_wr_req_t; typedef struct { - uint8_t* data; - size_t size; - } host_alloc_t; + uint64_t* data; + size_t size; + uint64_t ioaddr; + } host_buffer_t; void reset(); @@ -66,12 +70,12 @@ private: void sTxPort_bus(); void avs_bus(); - uint8_t* find_host_ptr(uint64_t addr); + uint64_t* to_host_ptr(uint64_t addr); std::future future_; bool stop_; - std::vector host_allocs_; + std::vector host_buffers_; std::vector dram_reads_; @@ -79,6 +83,8 @@ private: std::vector cci_writes_; + std::mutex mutex_; + RAM ram_; Vvortex_afu_shim *vortex_afu_; #ifdef VCD_OUTPUT diff --git a/driver/opae/vlsim/vortex_afu_shim.sv b/driver/opae/vlsim/vortex_afu_shim.sv index ce007856..4977979a 100644 --- a/driver/opae/vlsim/vortex_afu_shim.sv +++ b/driver/opae/vlsim/vortex_afu_shim.sv @@ -26,7 +26,12 @@ module vortex_afu_shim #( input t_ccip_clData vcp2af_sRxPort_c0_data, input logic vcp2af_sRxPort_c0_rspValid, input logic vcp2af_sRxPort_c0_mmioRdValid, - input logic vcp2af_sRxPort_c0_mmioWrValid, + input logic vcp2af_sRxPort_c0_mmioWrValid, + + input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address, + input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, + input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd, + input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used, input logic vcp2af_sRxPort_c1_hdr_rsvd1, @@ -77,63 +82,16 @@ module vortex_afu_shim #( output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select ); +t_if_ccip_Rx cp2af_sRxPort; +t_if_ccip_Tx af2cp_sTxPort; + vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) ) vortex_afu ( .clk(clk), - .SoftReset(reset), - .cp2af_sRxPort({ - vcp2af_sRxPort_c0_TxAlmFull, - vcp2af_sRxPort_c1_TxAlmFull, - - vcp2af_sRxPort_c0_hdr_vc_used, - vcp2af_sRxPort_c0_hdr_rsvd1, - vcp2af_sRxPort_c0_hdr_hit_miss, - vcp2af_sRxPort_c0_hdr_rsvd0, - vcp2af_sRxPort_c0_hdr_cl_num, - vcp2af_sRxPort_c0_hdr_resp_type, - vcp2af_sRxPort_c0_hdr_mdata, - vcp2af_sRxPort_c0_data, - vcp2af_sRxPort_c0_rspValid, - vcp2af_sRxPort_c0_mmioRdValid, - vcp2af_sRxPort_c0_mmioWrValid, - - vcp2af_sRxPort_c1_hdr_vc_used, - vcp2af_sRxPort_c1_hdr_rsvd1, - vcp2af_sRxPort_c1_hdr_hit_miss, - vcp2af_sRxPort_c1_hdr_format, - vcp2af_sRxPort_c1_hdr_rsvd0, - vcp2af_sRxPort_c1_hdr_cl_num, - vcp2af_sRxPort_c1_hdr_resp_type, - vcp2af_sRxPort_c1_hdr_mdata, - vcp2af_sRxPort_c1_rspValid} - ), - .af2cp_sTxPort({ - af2cp_sTxPort_c0_hdr_vc_sel, - af2cp_sTxPort_c0_hdr_rsvd1, - af2cp_sTxPort_c0_hdr_cl_len, - af2cp_sTxPort_c0_hdr_req_type, - af2cp_sTxPort_c0_hdr_rsvd0, - af2cp_sTxPort_c0_hdr_address, - af2cp_sTxPort_c0_hdr_mdata, - af2cp_sTxPort_c0_valid, - - af2cp_sTxPort_c1_hdr_rsvd2, - af2cp_sTxPort_c1_hdr_vc_sel, - af2cp_sTxPort_c1_hdr_sop, - af2cp_sTxPort_c1_hdr_rsvd1, - af2cp_sTxPort_c1_hdr_cl_len, - af2cp_sTxPort_c1_hdr_req_type, - af2cp_sTxPort_c1_hdr_rsvd0, - af2cp_sTxPort_c1_hdr_address, - af2cp_sTxPort_c1_hdr_mdata, - af2cp_sTxPort_c1_data, - af2cp_sTxPort_c1_valid, - - af2cp_sTxPort_c2_hdr_tid, - af2cp_sTxPort_c2_mmioRdValid, - af2cp_sTxPort_c2_data - }), + .reset(reset), + .cp2af_sRxPort(cp2af_sRxPort), + .af2cp_sTxPort(af2cp_sTxPort), .avs_writedata(avs_writedata), .avs_readdata(avs_readdata), .avs_address(avs_address), @@ -146,4 +104,67 @@ vortex_afu #( .mem_bank_select(mem_bank_select) ); +t_if_ccip_c0_RxHdr c0_RxHdr; +always @ (*) begin + c0_RxHdr = 'x; + if (vcp2af_sRxPort_c0_mmioWrValid || vcp2af_sRxPort_c0_mmioRdValid) begin + c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address; + c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length; + c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd; + c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; + end else begin + c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used; + c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1; + c0_RxHdr.rspMemHdr.hit_miss = vcp2af_sRxPort_c0_hdr_hit_miss; + c0_RxHdr.rspMemHdr.rsvd0 = vcp2af_sRxPort_c0_hdr_rsvd0; + c0_RxHdr.rspMemHdr.cl_num = vcp2af_sRxPort_c0_hdr_cl_num; + c0_RxHdr.rspMemHdr.resp_type = vcp2af_sRxPort_c0_hdr_resp_type; + c0_RxHdr.rspMemHdr.mdata = vcp2af_sRxPort_c0_hdr_mdata; + end +end + +assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull; +assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull; + +assign cp2af_sRxPort.c0.hdr = c0_RxHdr; +assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data; +assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid; +assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid; +assign cp2af_sRxPort.c0.mmioWrValid = vcp2af_sRxPort_c0_mmioWrValid; + +assign cp2af_sRxPort.c1.hdr.vc_used = vcp2af_sRxPort_c1_hdr_vc_used; +assign cp2af_sRxPort.c1.hdr.rsvd1 = vcp2af_sRxPort_c1_hdr_rsvd1; +assign cp2af_sRxPort.c1.hdr.hit_miss = vcp2af_sRxPort_c1_hdr_hit_miss; +assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format; +assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0; +assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num; +assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type; +assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; +assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; + +assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel; +assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1; +assign af2cp_sTxPort_c0_hdr_cl_len = af2cp_sTxPort.c0.hdr.cl_len; +assign af2cp_sTxPort_c0_hdr_req_type = af2cp_sTxPort.c0.hdr.req_type; +assign af2cp_sTxPort_c0_hdr_rsvd0 = af2cp_sTxPort.c0.hdr.rsvd0; +assign af2cp_sTxPort_c0_hdr_address = af2cp_sTxPort.c0.hdr.address; +assign af2cp_sTxPort_c0_hdr_mdata = af2cp_sTxPort.c0.hdr.mdata; +assign af2cp_sTxPort_c0_valid = af2cp_sTxPort.c0.valid; + +assign af2cp_sTxPort_c1_hdr_rsvd2 = af2cp_sTxPort.c1.hdr.rsvd2; +assign af2cp_sTxPort_c1_hdr_vc_sel = af2cp_sTxPort.c1.hdr.vc_sel; +assign af2cp_sTxPort_c1_hdr_sop = af2cp_sTxPort.c1.hdr.sop; +assign af2cp_sTxPort_c1_hdr_rsvd1 = af2cp_sTxPort.c1.hdr.rsvd1; +assign af2cp_sTxPort_c1_hdr_cl_len = af2cp_sTxPort.c1.hdr.cl_len; +assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type; +assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0; +assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address; +assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata; +assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; +assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid; + +assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; +assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; +assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data; + endmodule \ No newline at end of file diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 6ad759c9..41e69baa 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -22,9 +22,9 @@ #include "vx_scope.h" #endif -#define CACHE_LINESIZE 64 -#define ALLOC_BASE_ADDR 0x10000000 -#define LOCAL_MEM_SIZE 0xffffffff +#define CACHE_BLOCK_SIZE 64 +#define ALLOC_BASE_ADDR 0x10000000 +#define LOCAL_MEM_SIZE 0xffffffff #define CHECK_RES(_expr) \ do { \ @@ -68,7 +68,7 @@ typedef struct vx_device_ { typedef struct vx_buffer_ { uint64_t wsid; - volatile void* host_ptr; + void* host_ptr; uint64_t io_addr; vx_device_h hdevice; size_t size; @@ -106,7 +106,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) { *value = device->num_threads; break; case VX_CAPS_CACHE_LINESIZE: - *value = CACHE_LINESIZE; + *value = CACHE_BLOCK_SIZE; break; case VX_CAPS_LOCAL_MEM_SIZE: *value = LOCAL_MEM_SIZE; @@ -190,9 +190,9 @@ extern int vx_dev_open(vx_device_h* hdevice) { // Load device CAPS int ret = 0; ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id); - ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores); - ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps); - ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads); + ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores); + ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps); + ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads); if (ret != 0) { fpgaClose(accel_handle); return ret; @@ -253,8 +253,6 @@ extern int vx_dev_close(vx_device_h hdevice) { fpgaClose(device->fpga); - free(device); - return 0; } @@ -267,7 +265,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) vx_device_t *device = ((vx_device_t*)hdevice); size_t dev_mem_size = LOCAL_MEM_SIZE; - size_t asize = align_size(size, CACHE_LINESIZE); + size_t asize = align_size(size, CACHE_BLOCK_SIZE); if (device->mem_allocation + asize > dev_mem_size) return -1; @@ -292,7 +290,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb vx_device_t *device = ((vx_device_t*)hdevice); - size_t asize = align_size(size, CACHE_LINESIZE); + size_t asize = align_size(size, CACHE_BLOCK_SIZE); res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0); if (FPGA_OK != res) { @@ -324,11 +322,15 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb return 0; } -extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) { +extern void* vx_host_ptr(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return nullptr; vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); +#ifdef USE_VLSIM + vx_device_t *device = ((vx_device_t*)buffer->hdevice); + fpgaFlush(device); +#endif return buffer->host_ptr; } @@ -353,7 +355,6 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { vx_device_t *device = ((vx_device_t*)hdevice); - uint64_t data = 0; struct timespec sleep_time; #if defined(USE_ASE) @@ -368,6 +369,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000); for (;;) { + uint64_t data; CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data)); if (0 == data || 0 == timeout) { if (data != 0) { @@ -391,12 +393,12 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si vx_device_t *device = ((vx_device_t*)buffer->hdevice); size_t dev_mem_size = LOCAL_MEM_SIZE; - size_t asize = align_size(size, CACHE_LINESIZE); + size_t asize = align_size(size, CACHE_BLOCK_SIZE); // check alignment - if (!is_aligned(dev_maddr, CACHE_LINESIZE)) + if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE)) return -1; - if (!is_aligned(buffer->io_addr + src_offset, CACHE_LINESIZE)) + if (!is_aligned(buffer->io_addr + src_offset, CACHE_BLOCK_SIZE)) return -1; // bound checking @@ -409,7 +411,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si if (vx_ready_wait(buffer->hdevice, -1) != 0) return -1; - auto ls_shift = (int)std::log2(CACHE_LINESIZE); + auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift)); @@ -432,12 +434,12 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, vx_device_t *device = ((vx_device_t*)buffer->hdevice); size_t dev_mem_size = LOCAL_MEM_SIZE; - size_t asize = align_size(size, CACHE_LINESIZE); + size_t asize = align_size(size, CACHE_BLOCK_SIZE); // check alignment - if (!is_aligned(dev_maddr, CACHE_LINESIZE)) + if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE)) return -1; - if (!is_aligned(buffer->io_addr + dest_offset, CACHE_LINESIZE)) + if (!is_aligned(buffer->io_addr + dest_offset, CACHE_BLOCK_SIZE)) return -1; // bound checking @@ -450,7 +452,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, if (vx_ready_wait(buffer->hdevice, -1) != 0) return -1; - auto ls_shift = (int)std::log2(CACHE_LINESIZE); + auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift)); @@ -471,17 +473,17 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { vx_device_t* device = ((vx_device_t*)hdevice); - size_t asize = align_size(size, CACHE_LINESIZE); + size_t asize = align_size(size, CACHE_BLOCK_SIZE); // check alignment - if (!is_aligned(dev_maddr, CACHE_LINESIZE)) + if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE)) return -1; // Ensure ready for new command if (vx_ready_wait(hdevice, -1) != 0) return -1; - auto ls_shift = (int)std::log2(CACHE_LINESIZE); + auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift)); @@ -545,9 +547,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* valu // Ensure ready for new command if (vx_ready_wait(hdevice, -1) != 0) - return -1; + return -1; - // write CSR value CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr)); diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index 788a5a9d..875f7d22 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -304,7 +304,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb return 0; } -extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) { +extern void* vx_host_ptr(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return nullptr; diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index b499803d..9d96ea63 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -294,7 +294,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb return 0; } -extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) { +extern void* vx_host_ptr(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return nullptr; diff --git a/driver/stub/vortex.cpp b/driver/stub/vortex.cpp index f4a101f0..eae722e9 100644 --- a/driver/stub/vortex.cpp +++ b/driver/stub/vortex.cpp @@ -24,7 +24,7 @@ extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buff return -1; } -extern volatile void* vx_host_ptr(vx_buffer_h /*hbuffer*/) { +extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) { return nullptr; } diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index 2edd71cf..e5ab7800 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -45,6 +45,9 @@ run-fpga: $(PROJECT) run-ase: $(PROJECT) ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) +run-vlsim: $(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + run-rtlsim: $(PROJECT) LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 69ba7d12..d59f956b 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -68,18 +68,27 @@ uint64_t shuffle(int i, uint64_t value) { int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { int errors = 0; + int num_blocks_8 = (64 * num_blocks) / 8; // update source buffer - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + for (int i = 0; i < num_blocks_8; ++i) { ((uint64_t*)vx_host_ptr(buffer))[i] = shuffle(i, value); } + /*for (int i = 0; i < num_blocks; ++i) { + std::cout << "data[" << i << "]=0x"; + for (int j = 7; j >= 0; --j) { + std::cout << std::hex << ((uint64_t*)vx_host_ptr(buffer))[i * 8 +j]; + } + std::cout << std::endl; + }*/ + // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0)); // clear destination buffer - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + for (int i = 0; i < num_blocks_8; ++i) { ((uint64_t*)vx_host_ptr(buffer))[i] = 0; } @@ -89,7 +98,7 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { // verify result std::cout << "verify result" << std::endl; - for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + for (int i = 0; i < num_blocks_8; ++i) { auto curr = ((uint64_t*)vx_host_ptr(buffer))[i]; auto ref = shuffle(i, value); if (curr != ref) { @@ -214,7 +223,8 @@ int main(int argc, char *argv[]) { if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1)); - RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); + if (num_blocks >= 4) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, num_blocks/2)); + if (num_blocks >= 2) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); } if (1 == test || -1 == test) { diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index 348e5d11..d3261cc6 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -43,6 +43,9 @@ run-fpga: $(PROJECT) run-ase: $(PROJECT) ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) +run-vlsim: $(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + run-rtlsim: $(PROJECT) LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index b1fcb076..ce77c2e4 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -44,6 +44,9 @@ run-fpga: $(PROJECT) run-ase: $(PROJECT) ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) +run-vlsim: $(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + run-rtlsim: $(PROJECT) LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) diff --git a/driver/tests/dogfood/kernel.bin b/driver/tests/dogfood/kernel.bin old mode 100644 new mode 100755 diff --git a/hw/opae/ccip/ccip_if_pkg.sv b/hw/opae/ccip/ccip_if_pkg.sv index d300d154..930eaecb 100644 --- a/hw/opae/ccip/ccip_if_pkg.sv +++ b/hw/opae/ccip/ccip_if_pkg.sv @@ -235,4 +235,10 @@ typedef struct packed { t_if_ccip_c1_Rx c1; } t_if_ccip_Rx; + +typedef union packed { + t_ccip_c0_RspMemHdr rspMemHdr; + t_ccip_c0_ReqMmioHdr reqMmioHdr; +} t_if_ccip_c0_RxHdr; + endpackage \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index f4830e6c..9ba91fc1 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -156,7 +156,6 @@ logic avs_rdq_full; // CMD variables ////////////////////////////////////////////////////////////// -logic [2:0] cmd_type; t_ccip_clAddr cmd_io_addr; logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size; @@ -176,9 +175,10 @@ logic [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// `IGNORE_WARNINGS_BEGIN -t_ccip_c0_ReqMmioHdr mmio_hdr; +t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); `IGNORE_WARNINGS_END -assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); + +`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, "Oops!") t_if_ccip_c2_Tx mmio_tx; assign af2cp_sTxPort.c2 = mmio_tx; @@ -189,20 +189,29 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address); `endif +`DEBUG_BEGIN +wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; +wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; +wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; +wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; +wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; +wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata; +`DEBUG_END + +wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; + always_ff @(posedge clk) begin if (reset) begin mmio_tx.hdr <= 0; mmio_tx.data <= 0; mmio_tx.mmioRdValid <= 0; - cmd_type <= 0; cmd_io_addr <= 0; cmd_mem_addr <= 0; cmd_data_size <= 0; end else begin - cmd_type <= 0; mmio_tx.mmioRdValid <= 0; // serve MMIO write request @@ -228,7 +237,6 @@ begin `endif end MMIO_CMD_TYPE: begin - cmd_type <= $bits(cmd_type)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE $display("%t: MMIO_CMD_TYPE: %0d", $time, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); `endif @@ -258,6 +266,11 @@ begin $display("%t: MMIO_CSR_DATA: %0h", $time, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); `endif end + default: begin + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_WR: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + `endif + end endcase end @@ -279,15 +292,21 @@ begin AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved + 16'h0008: mmio_tx.data <= 64'h0; // reserved MMIO_STATUS: begin + mmio_tx.data <= 64'(state); `ifdef DBG_PRINT_OPAE if (state != state_t'(mmio_tx.data)) begin $display("%t: MMIO_STATUS: state=%0d", $time, state); end - `endif - mmio_tx.data <= 64'(state); + `endif end + MMIO_CSR_READ: begin + mmio_tx.data <= 64'(cmd_csr_rdata); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_READ: data=%0h", $time, cmd_csr_rdata); + `endif + end `ifdef SCOPE MMIO_SCOPE_READ: begin mmio_tx.data <= cmd_scope_rdata; @@ -296,13 +315,11 @@ begin `endif end `endif - MMIO_CSR_READ: begin - mmio_tx.data <= 64'(cmd_csr_rdata); + default: begin `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_READ: data=%0h", $time, cmd_csr_rdata); + $display("%t: MMIO_RD: addr=%0h", $time, mmio_hdr.address); `endif end - default: mmio_tx.data <= 64'h0; endcase mmio_tx.mmioRdValid <= 1; // post response end @@ -376,40 +393,58 @@ begin STATE_READ: begin if (cmd_read_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end STATE_WRITE: begin if (cmd_write_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end STATE_START: begin // vortex reset cycle - state <= STATE_RUN; + state <= STATE_RUN; end STATE_RUN: begin if (cmd_run_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end STATE_CLFLUSH: begin if (cmd_clflush_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end STATE_CSR_READ: begin if (cmd_csr_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end STATE_CSR_WRITE: begin if (cmd_csr_done) begin state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif end end diff --git a/hw/opae/vortex_afu.vh b/hw/opae/vortex_afu.vh index 910ea3af..15513137 100644 --- a/hw/opae/vortex_afu.vh +++ b/hw/opae/vortex_afu.vh @@ -4,7 +4,7 @@ `include "ccip_if_pkg.sv" `define PLATFORM_PROVIDES_LOCAL_MEMORY -`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 27 +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26 `define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512 `define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4