diff --git a/driver/common/opae.cpp b/driver/common/opae.cpp index e0f9ad09..1022639e 100755 --- a/driver/common/opae.cpp +++ b/driver/common/opae.cpp @@ -18,6 +18,7 @@ #endif #include "vx_utils.h" +#include "vx_malloc.h" #include #include #include "vortex_afu.h" @@ -51,14 +52,25 @@ /////////////////////////////////////////////////////////////////////////////// -typedef struct vx_device_ { +class vx_device { +public: + vx_device() + : mem_allocator( + ALLOC_BASE_ADDR, + ALLOC_BASE_ADDR + LOCAL_MEM_SIZE, + 4096, + CACHE_BLOCK_SIZE) + {} + + ~vx_device() {} + fpga_handle fpga; - uint64_t mem_allocation; + vortex::MemoryAllocator mem_allocator; unsigned version; unsigned num_cores; unsigned num_warps; unsigned num_threads; -} vx_device_t; +}; typedef struct vx_buffer_ { uint64_t wsid; @@ -102,7 +114,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) { if (nullptr == hdevice) return -1; - vx_device_t *device = ((vx_device_t*)hdevice); + vx_device *device = ((vx_device*)hdevice); switch (caps_id) { case VX_CAPS_VERSION: @@ -143,7 +155,7 @@ extern int vx_dev_open(vx_device_h* hdevice) { return -1; fpga_handle accel_handle; - vx_device_t* device; + vx_device* device; #ifndef USE_VLSIM fpga_result res; @@ -204,14 +216,13 @@ extern int vx_dev_open(vx_device_h* hdevice) { #endif // allocate device object - device = (vx_device_t*)malloc(sizeof(vx_device_t)); + device = new vx_device(); if (nullptr == device) { fpgaClose(accel_handle); return -1; } device->fpga = accel_handle; - device->mem_allocation = ALLOC_BASE_ADDR; { // Load device CAPS @@ -254,7 +265,7 @@ extern int vx_dev_close(vx_device_h hdevice) { if (nullptr == hdevice) return -1; - vx_device_t *device = ((vx_device_t*)hdevice); + vx_device *device = ((vx_device*)hdevice); #ifdef SCOPE vx_scope_stop(device->fpga); @@ -267,30 +278,30 @@ extern int vx_dev_close(vx_device_h hdevice) { fpgaClose(device->fpga); + delete device; + return 0; } -extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { +extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { if (nullptr == hdevice || nullptr == dev_maddr || 0 >= size) return -1; - vx_device_t *device = ((vx_device_t*)hdevice); - - size_t dev_mem_size = LOCAL_MEM_SIZE; - size_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - - if (device->mem_allocation + asize > dev_mem_size) - return -1; - - *dev_maddr = device->mem_allocation; - device->mem_allocation += asize; - - return 0; + vx_device *device = ((vx_device*)hdevice); + return device->mem_allocator.allocate(size, dev_maddr); } -extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { +extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) { + if (nullptr == hdevice) + return -1; + + vx_device *device = ((vx_device*)hdevice); + return device->mem_allocator.release(dev_maddr); +} + +extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { fpga_result res; void* host_ptr; uint64_t wsid; @@ -302,7 +313,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* || nullptr == hbuffer) return -1; - vx_device_t *device = ((vx_device_t*)hdevice); + vx_device *device = ((vx_device*)hdevice); size_t asize = aligned_size(size, CACHE_BLOCK_SIZE); @@ -344,12 +355,12 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) { return buffer->host_ptr; } -extern int vx_buf_release(vx_buffer_h hbuffer) { +extern int vx_buf_free(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return -1; vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - vx_device_t *device = ((vx_device_t*)buffer->hdevice); + vx_device *device = ((vx_device*)buffer->hdevice); fpgaReleaseBuffer(device->fpga, buffer->wsid); @@ -364,7 +375,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) { std::unordered_map print_bufs; - vx_device_t *device = ((vx_device_t*)hdevice); + vx_device *device = ((vx_device*)hdevice); struct timespec sleep_time; @@ -427,7 +438,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size return -1; vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer); - vx_device_t *device = ((vx_device_t*)buffer->hdevice); + vx_device *device = ((vx_device*)buffer->hdevice); uint64_t dev_mem_size = LOCAL_MEM_SIZE; uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); @@ -468,7 +479,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si return -1; vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer); - vx_device_t *device = ((vx_device_t*)buffer->hdevice); + vx_device *device = ((vx_device*)buffer->hdevice); uint64_t dev_mem_size = LOCAL_MEM_SIZE; uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); @@ -507,7 +518,7 @@ extern int vx_start(vx_device_h hdevice) { if (nullptr == hdevice) return -1; - vx_device_t *device = ((vx_device_t*)hdevice); + vx_device *device = ((vx_device*)hdevice); // Ensure ready for new command if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0) diff --git a/driver/common/vx_malloc.h b/driver/common/vx_malloc.h new file mode 100644 index 00000000..4d1d1b45 --- /dev/null +++ b/driver/common/vx_malloc.h @@ -0,0 +1,399 @@ +#pragma once + +#include +#include + +namespace vortex { + +class MemoryAllocator { +public: + MemoryAllocator( + uint64_t minAddress, + uint64_t maxAddress, + uint32_t pageAlign, + uint32_t blockAlign) + : nextAddress_(minAddress) + , maxAddress_(maxAddress) + , pageAlign_(pageAlign) + , blockAlign_(blockAlign) + , pages_(nullptr) + {} + + ~MemoryAllocator() { + // Free allocated pages + page_t* pCurPage = pages_; + while (pCurPage) { + auto nextPage = pCurPage->next; + this->DeletePage(pCurPage); + pCurPage = nextPage; + } + } + + int allocate(uint64_t size, uint64_t* addr) { + if (size == 0 || addr == nullptr) + return -1; + + // Align allocation size + size = AlignSize(size, blockAlign_); + + // Walk thru all pages to find a free block + block_t* pFreeBlock = nullptr; + auto pCurPage = pages_; + while (pCurPage) { + auto pCurBlock = pCurPage->pFreeSList; + if (pCurBlock) { + // The free list is already sorted with biggest block on top, + // just check if the last block has enough space. + if (pCurBlock->size >= size) { + // Find the smallest matching block + while (pCurBlock->nextFreeS + && (pCurBlock->nextFreeS->size >= size)) { + pCurBlock = pCurBlock->nextFreeS; + } + // Return the free block + pFreeBlock = pCurBlock; + break; + } + } + pCurPage = pCurPage->next; + } + + if (nullptr == pFreeBlock) { + // Allocate a new page for this request + pCurPage = this->NewPage(size); + if (nullptr == pCurPage) + return -1; + pFreeBlock = pCurPage->pFreeSList; + } + + // Remove the block from the free lists + assert(pFreeBlock->size >= size); + pCurPage->RemoveFreeMBlock(pFreeBlock); + pCurPage->RemoveFreeSBlock(pFreeBlock); + + // If the free block we have found is larger than what we are looking for, + // we may be able to split our free block in two. + uint64_t extraBytes = pFreeBlock->size - size; + if (extraBytes >= blockAlign_) { + // Reduce the free block size to the requested value + pFreeBlock->size = size; + + // Allocate a new block to contain the extra buffer + auto nextAddr = pFreeBlock->addr + size; + auto pNewBlock = new block_t(nextAddr, extraBytes); + + // Add the new block to the free lists + pCurPage->InsertFreeMBlock(pNewBlock); + pCurPage->InsertFreeSBlock(pNewBlock); + } + + // Insert the free block into the used list + pCurPage->InsertUsedBlock(pFreeBlock); + + // Return the free block address + *addr = pFreeBlock->addr; + + return 0; + } + + int release(uint64_t addr) { + // Walk all pages to find the pointer + block_t* pUsedBlock = nullptr; + auto pCurPage = pages_; + while (pCurPage) { + if ((pCurPage->addr < addr) + && ((pCurPage->addr + pCurPage->size) > addr)) { + auto pCurBlock = pCurPage->pUsedList; + while (pCurBlock) { + if (pCurBlock->addr == addr) { + pUsedBlock = pCurBlock; + break; + } + pCurBlock = pCurBlock->nextUsed; + } + if (pUsedBlock) + break; + } + pCurPage = pCurPage->next; + } + + // found the corresponding block? + if (nullptr == pUsedBlock) + return -1; + + // Remove the block from the used list + pCurPage->RemoveUsedBlock(pUsedBlock); + + // Insert the block into the free M-list. + pCurPage->InsertFreeMBlock(pUsedBlock); + + // Check if we can merge adjacent free blocks from the left. + if (pUsedBlock->prevFreeM) { + // Calculate the previous address + auto prevAddr = pUsedBlock->prevFreeM->addr + pUsedBlock->prevFreeM->size; + if (pUsedBlock->addr == prevAddr) { + auto pMergedBlock = pUsedBlock->prevFreeM; + + // Detach left block from the free S-list + pCurPage->RemoveFreeSBlock(pMergedBlock); + + // Merge the blocks to the left + pMergedBlock->size += pUsedBlock->size; + pMergedBlock->nextFreeM = pUsedBlock->nextFreeM; + if (pMergedBlock->nextFreeM) { + pMergedBlock->nextFreeM->prevFreeM = pMergedBlock; + } + pUsedBlock = pMergedBlock; + } + } + + // Check if we can merge adjacent free blocks from the right. + if (pUsedBlock->nextFreeM) { + // Calculate the next allocation start address + auto nextMem = pUsedBlock->addr + pUsedBlock->size; + if (pUsedBlock->nextFreeM->addr == nextMem) { + auto nextBlock = pUsedBlock->nextFreeM; + + // Detach right block from the free S-list + pCurPage->RemoveFreeSBlock(nextBlock); + + // Merge the blocks to the right + pUsedBlock->size += nextBlock->size; + pUsedBlock->nextFreeM = nextBlock->nextFreeM; + if (pUsedBlock->nextFreeM) { + pUsedBlock->nextFreeM->prevFreeM = pUsedBlock; + } + } + } + + // Insert the block into the free S-list. + pCurPage->InsertFreeSBlock(pUsedBlock); + + // Check if we can free empty pages + if (nullptr == pCurPage->pUsedList) { + // Try to delete the page + while (pCurPage && this->DeletePage(pCurPage)) { + pCurPage = this->NextEmptyPage(); + } + + } + + return 0; + } + +private: + + struct block_t { + block_t* nextFreeS; + block_t* prevFreeS; + + block_t* nextFreeM; + block_t* prevFreeM; + + block_t* nextUsed; + block_t* prevUsed; + + uint64_t addr; + uint64_t size; + + block_t(uint64_t addr, uint64_t size) + : nextFreeS(nullptr) + , prevFreeS(nullptr) + , nextFreeM(nullptr) + , prevFreeM(nullptr) + , nextUsed(nullptr) + , prevUsed(nullptr) + , addr(addr) + , size(size) + {} + }; + + struct page_t { + page_t* next; + + // List of used blocks + block_t* pUsedList; + + // List with blocks sorted by descreasing sizes + // Used for block lookup during memory allocation. + block_t* pFreeSList; + + // List with blocks sorted by increasing memory addresses + // Used for block merging during memory release. + block_t* pFreeMList; + + uint64_t addr; + uint64_t size; + + page_t(uint64_t addr, uint64_t size) : + next(nullptr), + pUsedList(nullptr), + addr(addr), + size(size) { + pFreeSList = pFreeMList = new block_t(addr, size); + } + + void InsertUsedBlock(block_t* pBlock) { + pBlock->nextUsed = pUsedList; + if (pUsedList) { + pUsedList->prevUsed = pBlock; + } + pUsedList = pBlock; + } + + void RemoveUsedBlock(block_t* pBlock) { + if (pBlock->prevUsed) { + pBlock->prevUsed->nextUsed = pBlock->nextUsed; + } else { + pUsedList = pBlock->nextUsed; + } + if (pBlock->nextUsed) { + pBlock->nextUsed->prevUsed = pBlock->prevUsed; + } + pBlock->nextUsed = nullptr; + pBlock->prevUsed = nullptr; + } + + void InsertFreeMBlock(block_t* pBlock) { + block_t* pCurBlock = pFreeMList; + block_t* prevBlock = nullptr; + while (pCurBlock && (pCurBlock->addr < pBlock->addr)) { + prevBlock = pCurBlock; + pCurBlock = pCurBlock->nextFreeM; + } + pBlock->nextFreeM = pCurBlock; + pBlock->prevFreeM = prevBlock; + if (prevBlock) { + prevBlock->nextFreeM = pBlock; + } else { + pFreeMList = pBlock; + } + if (pCurBlock) { + pCurBlock->prevFreeM = pBlock; + } + } + + void RemoveFreeMBlock(block_t* pBlock) { + if (pBlock->prevFreeM) { + pBlock->prevFreeM->nextFreeM = pBlock->nextFreeM; + } else { + pFreeMList = pBlock->nextFreeM; + } + if (pBlock->nextFreeM) { + pBlock->nextFreeM->prevFreeM = pBlock->prevFreeM; + } + pBlock->nextFreeM = nullptr; + pBlock->prevFreeM = nullptr; + } + + void InsertFreeSBlock(block_t* pBlock) { + block_t* pCurBlock = this->pFreeSList; + block_t* prevBlock = nullptr; + while (pCurBlock && (pCurBlock->size > pBlock->size)) { + prevBlock = pCurBlock; + pCurBlock = pCurBlock->nextFreeS; + } + pBlock->nextFreeS = pCurBlock; + pBlock->prevFreeS = prevBlock; + if (prevBlock) { + prevBlock->nextFreeS = pBlock; + } else { + this->pFreeSList = pBlock; + } + if (pCurBlock) { + pCurBlock->prevFreeS = pBlock; + } + } + + void RemoveFreeSBlock(block_t* pBlock) { + if (pBlock->prevFreeS) { + pBlock->prevFreeS->nextFreeS = pBlock->nextFreeS; + } else { + pFreeSList = pBlock->nextFreeS; + } + if (pBlock->nextFreeS) { + pBlock->nextFreeS->prevFreeS = pBlock->prevFreeS; + } + pBlock->nextFreeS = nullptr; + pBlock->prevFreeS = nullptr; + } + }; + + page_t* NewPage(uint64_t size) { + // Increase buffer size to include the page and first block size + // also add padding to ensure page aligment + size = AlignSize(size, pageAlign_); + + // Allocate page memory + auto addr = nextAddress_; + nextAddress_ += size; + + // Overflow check + if (nextAddress_ > maxAddress_) + return nullptr; + + // Allocate the page + auto pNewPage = new page_t(addr, size); + + // Insert the new page into the list + pNewPage->next = pages_; + pages_ = pNewPage; + + return pNewPage; + } + + bool DeletePage(page_t* pPage) { + // The page should be empty + assert(nullptr == pPage->pUsedList); + assert(pPage->pFreeMList && (nullptr == pPage->pFreeMList->nextFreeM)); + + // Only delete top-level pages + auto nextAddr = pPage->addr + pPage->size; + if (nextAddr != nextAddress_) + return false; + + // Remove the page from the list + page_t* prevPage = nullptr; + auto pCurPage = pages_; + while (pCurPage) { + if (pCurPage == pPage) { + if (prevPage) { + prevPage->next = pCurPage->next; + } else { + pages_ = pCurPage->next; + } + break; + } + prevPage = pCurPage; + pCurPage = pCurPage->next; + } + + // Update next allocation address + nextAddress_ = pPage->addr; + + return true; + } + + page_t* NextEmptyPage() { + auto pCurPage = pages_; + while (pCurPage) { + if (nullptr == pCurPage->pUsedList) + return pCurPage; + pCurPage = pCurPage->next; + } + return nullptr; + } + + static uint64_t AlignSize(uint64_t size, uint64_t alignment) { + assert(0 == (alignment & (alignment - 1))); + return (size + alignment - 1) & ~(alignment - 1); + } + + uint64_t nextAddress_; + uint64_t maxAddress_; + uint32_t pageAlign_; + uint32_t blockAlign_; + page_t* pages_; +}; + +} // namespace vortex \ No newline at end of file diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index a69df27c..4ca5377d 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -22,7 +22,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6 if (NULL == content || 0 == size) return -1; - uint32_t buffer_transfer_size = 65536; + uint32_t buffer_transfer_size = 65536; // 64 KB uint64_t kernel_base_addr; err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr); if (err != 0) @@ -30,7 +30,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6 // allocate device buffer vx_buffer_h buffer; - err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer); + err = vx_buf_alloc(device, buffer_transfer_size, &buffer); if (err != 0) return -1; @@ -54,13 +54,13 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6 err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0); if (err != 0) { - vx_buf_release(buffer); + vx_buf_free(buffer); return err; } offset += chunk_size; } - vx_buf_release(buffer); + vx_buf_free(buffer); return 0; } @@ -149,7 +149,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { return ret; vx_buffer_h staging_buf; - ret = vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf); + ret = vx_buf_alloc(device, 64 * sizeof(uint32_t), &staging_buf); if (ret != 0) return ret; @@ -158,7 +158,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { for (unsigned core_id = 0; core_id < num_cores; ++core_id) { ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0); if (ret != 0) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); return ret; } @@ -336,7 +336,21 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { #endif // release allocated resources - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); return ret; +} + +// Deprecated API functions + +extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { + return vx_buf_alloc(hdevice, size, hbuffer); +} + +extern int vx_buf_release(vx_buffer_h hbuffer) { + return vx_buf_free(hbuffer); +} + +extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { + return vx_mem_alloc(hdevice, size, dev_maddr); } \ No newline at end of file diff --git a/driver/fpga/Makefile b/driver/fpga/Makefile index bdc12d60..e5a5417f 100644 --- a/driver/fpga/Makefile +++ b/driver/fpga/Makefile @@ -6,8 +6,7 @@ SCRIPT_DIR=../../hw/scripts OPAE_SYN_DIR=../../hw/syn/opae -CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors -#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR) @@ -39,6 +38,13 @@ PROJECT = libvortex.so SRCS = ../common/opae.cpp ../common/vx_utils.cpp +# Debugigng +ifdef DEBUG + CXXFLAGS += -g -O0 +else + CXXFLAGS += -O2 -DNDEBUG +endif + # Enable scope analyzer ifdef SCOPE CXXFLAGS += -DSCOPE diff --git a/driver/include/vortex.h b/driver/include/vortex.h index 0fc9c5ce..693e4fac 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -35,16 +35,19 @@ int vx_dev_close(vx_device_h hdevice); int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value); // Allocate shared buffer with device -int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer); +int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer); + +// release buffer +int vx_buf_free(vx_buffer_h hbuffer); // Get host pointer address void* vx_host_ptr(vx_buffer_h hbuffer); -// release buffer -int vx_buf_release(vx_buffer_h hbuffer); - // allocate device memory and return address -int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr); +int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr); + +// release device memory +int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr); // Copy bytes from buffer to device local memory int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset); @@ -69,6 +72,11 @@ int vx_upload_kernel_file(vx_device_h device, const char* filename); // dump performance counters int vx_dump_perf(vx_device_h device, FILE* stream); +//////////////////////////// DEPRECATED FUNCTIONS ///////////////////////////// +int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr); +int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer); +int vx_buf_release(vx_buffer_h hbuffer); + #ifdef __cplusplus } #endif diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 72d3a07a..1b81076b 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -1,7 +1,6 @@ RTLSIM_DIR = ../../sim/rtlsim -CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors -#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common @@ -19,6 +18,13 @@ LDFLAGS += -L. -lrtlsim SRCS = vortex.cpp ../common/vx_utils.cpp +# Debugigng +ifdef DEBUG + CXXFLAGS += -g -O0 +else + CXXFLAGS += -O2 -DNDEBUG +endif + # Enable perf counters ifdef PERF CXXFLAGS += -DPERF_ENABLE diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index 85f7054c..8e180339 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,11 @@ class vx_device { public: vx_device() : ram_(RAM_PAGE_SIZE) - , mem_allocation_(ALLOC_BASE_ADDR) + , mem_allocator_( + ALLOC_BASE_ADDR, + ALLOC_BASE_ADDR + LOCAL_MEM_SIZE, + RAM_PAGE_SIZE, + CACHE_BLOCK_SIZE) { processor_.attach_ram(&ram_); } @@ -72,13 +77,11 @@ public: } int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) { - uint64_t dev_mem_size = LOCAL_MEM_SIZE; - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - if (mem_allocation_ + asize > dev_mem_size) - return -1; - *dev_maddr = mem_allocation_; - mem_allocation_ += asize; - return 0; + return mem_allocator_.allocate(size, dev_maddr); + } + + int free_local_mem(uint64_t dev_maddr) { + return mem_allocator_.release(dev_maddr); } int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) { @@ -149,7 +152,7 @@ private: RAM ram_; Processor processor_; - uint64_t mem_allocation_; + MemoryAllocator mem_allocator_; std::future future_; }; @@ -250,7 +253,7 @@ extern int vx_dev_close(vx_device_h hdevice) { return 0; } -extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { +extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { if (nullptr == hdevice || nullptr == dev_maddr || 0 >= size) @@ -260,8 +263,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma return device->alloc_local_mem(size, dev_maddr); } +extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) { + if (nullptr == hdevice) + return -1; -extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { + vx_device *device = ((vx_device*)hdevice); + return device->free_local_mem(dev_maddr); +} + +extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice || 0 >= size || nullptr == hbuffer) @@ -289,7 +299,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) { return buffer->data(); } -extern int vx_buf_release(vx_buffer_h hbuffer) { +extern int vx_buf_free(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return -1; diff --git a/driver/simx/Makefile b/driver/simx/Makefile index b5723972..d563637c 100644 --- a/driver/simx/Makefile +++ b/driver/simx/Makefile @@ -1,7 +1,6 @@ SIMX_DIR = ../../sim/simx -CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors -#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common @@ -11,7 +10,14 @@ CXXFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread LDFLAGS += -L. -lsimx -SRCS = vortex.cpp ../common/vx_utils.cpp +SRCS = vortex.cpp ../common/vx_utils.cpp + +# Debugigng +ifdef DEBUG + CXXFLAGS += -g -O0 +else + CXXFLAGS += -O2 -DNDEBUG +endif PROJECT = libvortex.so diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index e1897139..b02cbbce 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -8,6 +8,7 @@ #include #include +#include #include @@ -66,7 +67,11 @@ public: : arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS) , ram_(RAM_PAGE_SIZE) , processor_(arch_) - , mem_allocation_(ALLOC_BASE_ADDR) + , mem_allocator_( + ALLOC_BASE_ADDR, + ALLOC_BASE_ADDR + LOCAL_MEM_SIZE, + RAM_PAGE_SIZE, + CACHE_BLOCK_SIZE) { // attach memory module processor_.attach_ram(&ram_); @@ -76,16 +81,14 @@ public: if (future_.valid()) { future_.wait(); } - } + } int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) { - uint64_t dev_mem_size = LOCAL_MEM_SIZE; - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - if (mem_allocation_ + asize > dev_mem_size) - return -1; - *dev_maddr = mem_allocation_; - mem_allocation_ += asize; - return 0; + return mem_allocator_.allocate(size, dev_maddr); + } + + int free_local_mem(uint64_t dev_maddr) { + return mem_allocator_.release(dev_maddr); } int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) { @@ -151,7 +154,7 @@ private: ArchDef arch_; RAM ram_; Processor processor_; - uint64_t mem_allocation_; + MemoryAllocator mem_allocator_; std::future future_; }; @@ -252,7 +255,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) { return 0; } -extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { +extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) { if (nullptr == hdevice || nullptr == dev_maddr || 0 >= size) @@ -262,7 +265,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma return device->alloc_local_mem(size, dev_maddr); } -extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { +extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) { + if (nullptr == hdevice) + return -1; + + vx_device *device = ((vx_device*)hdevice); + return device->free_local_mem(dev_maddr); +} + +extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice || 0 >= size || nullptr == hbuffer) @@ -290,7 +301,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) { return buffer->data(); } -extern int vx_buf_release(vx_buffer_h hbuffer) { +extern int vx_buf_free(vx_buffer_h hbuffer) { if (nullptr == hbuffer) return -1; diff --git a/driver/stub/Makefile b/driver/stub/Makefile index 985ed30e..aebac2e2 100644 --- a/driver/stub/Makefile +++ b/driver/stub/Makefile @@ -1,5 +1,4 @@ -CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors -#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I../include -I../../runtime -I../../hw diff --git a/driver/stub/vortex.cpp b/driver/stub/vortex.cpp index 95777257..1fa86796 100644 --- a/driver/stub/vortex.cpp +++ b/driver/stub/vortex.cpp @@ -12,11 +12,15 @@ extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t* return -1; } -extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) { +extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) { return -1; } -extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) { +int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_maddr*/) { + return -1; +} + +extern int vx_buf_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) { return -1; } @@ -24,7 +28,7 @@ extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) { return nullptr; } -extern int vx_buf_release(vx_buffer_h /*hbuffer*/) { +extern int vx_buf_free(vx_buffer_h /*hbuffer*/) { return -1; } diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 1d081c9e..29761c1a 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -13,8 +13,6 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -TOP = vx_cache_sim - SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp diff --git a/tests/regression/basic/common.h b/tests/regression/basic/common.h index e496cf34..83116503 100644 --- a/tests/regression/basic/common.h +++ b/tests/regression/basic/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t count; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/basic/kernel.c b/tests/regression/basic/kernel.c index bc5ec076..cc2c0fd1 100644 --- a/tests/regression/basic/kernel.c +++ b/tests/regression/basic/kernel.c @@ -5,8 +5,8 @@ void main() { kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; uint32_t count = arg->count; - int32_t* src_ptr = (int32_t*)arg->src_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src_ptr = (int32_t*)arg->src_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = vx_core_id() * count; diff --git a/tests/regression/basic/main.cpp b/tests/regression/basic/main.cpp index fcea1fda..5183b04c 100755 --- a/tests/regression/basic/main.cpp +++ b/tests/regression/basic/main.cpp @@ -24,6 +24,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -57,9 +58,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -152,7 +155,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, } std::cout << "upload source buffer" << std::endl; auto t0 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0)); auto t1 = std::chrono::high_resolution_clock::now(); // clear destination buffer @@ -163,7 +166,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // start device std::cout << "start execution" << std::endl; @@ -175,7 +178,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, // read destination buffer from local memory std::cout << "read destination buffer from local memory" << std::endl; auto t4 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); auto t5 = std::chrono::high_resolution_clock::now(); @@ -215,8 +218,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, int main(int argc, char *argv[]) { size_t value; - kernel_arg_t kernel_arg; - + // parse command arguments parse_args(argc, argv); @@ -238,25 +240,25 @@ int main(int argc, char *argv[]) { std::cout << "buffer size: " << buf_size << " bytes" << std::endl; // allocate device memory - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.count = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // run tests if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; - RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); + RT_CHECK(run_memcopy_test(kernel_arg.src_addr, 0x0badf00d40ff40ff, num_blocks)); } if (1 == test || -1 == test) { diff --git a/tests/regression/demo/common.h b/tests/regression/demo/common.h index 3a38ae43..29e9bcf5 100644 --- a/tests/regression/demo/common.h +++ b/tests/regression/demo/common.h @@ -6,9 +6,9 @@ typedef struct { uint32_t num_tasks; uint32_t task_size; - uint32_t src0_ptr; - uint32_t src1_ptr; - uint32_t dst_ptr; + uint32_t src0_addr; + uint32_t src1_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/demo/kernel.c b/tests/regression/demo/kernel.c index 40fe4273..3f80cb41 100644 --- a/tests/regression/demo/kernel.c +++ b/tests/regression/demo/kernel.c @@ -5,9 +5,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; diff --git a/tests/regression/demo/main.cpp b/tests/regression/demo/main.cpp index 29cc7d85..67a637f1 100644 --- a/tests/regression/demo/main.cpp +++ b/tests/regression/demo/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } - if (device) { + if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -99,7 +103,6 @@ int run_test(const kernel_arg_t& kernel_arg, int main(int argc, char *argv[]) { size_t value; - kernel_arg_t kernel_arg; // parse command arguments parse_args(argc, argv); @@ -131,24 +134,24 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src0_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src1_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; - std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -166,7 +169,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0)); // upload source buffer1 { @@ -176,7 +179,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0)); // clear destination buffer { @@ -186,7 +189,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/diverge/common.h b/tests/regression/diverge/common.h index 6346c58e..ccb28bb9 100644 --- a/tests/regression/diverge/common.h +++ b/tests/regression/diverge/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t num_points; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/diverge/kernel.c b/tests/regression/diverge/kernel.c index 98fd2b0e..d30516b8 100644 --- a/tests/regression/diverge/kernel.c +++ b/tests/regression/diverge/kernel.c @@ -6,8 +6,8 @@ // Parallel Selection sort void kernel_body(int task_id, kernel_arg_t* arg) { - int32_t* src_ptr = (int32_t*)arg->src_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src_ptr = (int32_t*)arg->src_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; int value = src_ptr[task_id]; diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp index 778d118f..9288aac3 100644 --- a/tests/regression/diverge/main.cpp +++ b/tests/regression/diverge/main.cpp @@ -25,6 +25,7 @@ std::vector ref_data; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -125,7 +128,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -153,7 +156,6 @@ int run_test(const kernel_arg_t& kernel_arg, int main(int argc, char *argv[]) { size_t value; - kernel_arg_t kernel_arg; // parse command arguments parse_args(argc, argv); @@ -189,22 +191,22 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value)); - kernel_arg.src_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, src_buf_size, &value)); + kernel_arg.src_addr = value; + RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_points = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t staging_buf_size = std::max(src_buf_size, std::max(dst_buf_size, sizeof(kernel_arg_t))); - RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -222,7 +224,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0)); // clear destination buffer { @@ -232,7 +234,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/dogfood/common.h b/tests/regression/dogfood/common.h index 7e0f0b3d..588d3ed5 100644 --- a/tests/regression/dogfood/common.h +++ b/tests/regression/dogfood/common.h @@ -7,9 +7,9 @@ typedef struct { uint32_t testid; uint32_t num_tasks; uint32_t task_size; - uint32_t src0_ptr; - uint32_t src1_ptr; - uint32_t dst_ptr; + uint32_t src0_addr; + uint32_t src1_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/dogfood/kernel.c b/tests/regression/dogfood/kernel.c index ce65f0d8..781545fa 100644 --- a/tests/regression/dogfood/kernel.c +++ b/tests/regression/dogfood/kernel.c @@ -13,9 +13,9 @@ inline float __ieee754_sqrtf (float x) { void kernel_iadd(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -28,9 +28,9 @@ void kernel_iadd(int task_id, kernel_arg_t* arg) { void kernel_imul(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -43,9 +43,9 @@ void kernel_imul(int task_id, kernel_arg_t* arg) { void kernel_idiv(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -58,9 +58,9 @@ void kernel_idiv(int task_id, kernel_arg_t* arg) { void kernel_idiv_mul(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -75,9 +75,9 @@ void kernel_idiv_mul(int task_id, kernel_arg_t* arg) { void kernel_fadd(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -90,9 +90,9 @@ void kernel_fadd(int task_id, kernel_arg_t* arg) { void kernel_fsub(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -105,9 +105,9 @@ void kernel_fsub(int task_id, kernel_arg_t* arg) { void kernel_fmul(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -120,9 +120,9 @@ void kernel_fmul(int task_id, kernel_arg_t* arg) { void kernel_fmadd(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -135,9 +135,9 @@ void kernel_fmadd(int task_id, kernel_arg_t* arg) { void kernel_fmsub(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -150,9 +150,9 @@ void kernel_fmsub(int task_id, kernel_arg_t* arg) { void kernel_fnmadd(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -165,9 +165,9 @@ void kernel_fnmadd(int task_id, kernel_arg_t* arg) { void kernel_fnmsub(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -180,9 +180,9 @@ void kernel_fnmsub(int task_id, kernel_arg_t* arg) { void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -197,9 +197,9 @@ void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) { void kernel_fdiv(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -212,9 +212,9 @@ void kernel_fdiv(int task_id, kernel_arg_t* arg) { void kernel_fdiv2(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -229,9 +229,9 @@ void kernel_fdiv2(int task_id, kernel_arg_t* arg) { void kernel_fsqrt(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -244,9 +244,9 @@ void kernel_fsqrt(int task_id, kernel_arg_t* arg) { void kernel_ftoi(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -260,9 +260,9 @@ void kernel_ftoi(int task_id, kernel_arg_t* arg) { void kernel_ftou(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - float* src0_ptr = (float*)arg->src0_ptr; - float* src1_ptr = (float*)arg->src1_ptr; - uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr; + float* src0_ptr = (float*)arg->src0_addr; + float* src1_ptr = (float*)arg->src1_addr; + uint32_t* dst_ptr = (uint32_t*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -276,9 +276,9 @@ void kernel_ftou(int task_id, kernel_arg_t* arg) { void kernel_itof(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { @@ -292,9 +292,9 @@ void kernel_itof(int task_id, kernel_arg_t* arg) { void kernel_utof(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * count; for (uint32_t i = 0; i < count; ++i) { diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index 71ae6624..48e7d458 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -87,6 +87,7 @@ vx_buffer_h arg_buf = nullptr; vx_buffer_h src1_buf = nullptr; vx_buffer_h src2_buf = nullptr; vx_buffer_h dst_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -130,26 +131,28 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (arg_buf) { - vx_buf_release(arg_buf); + vx_buf_free(arg_buf); } if (src1_buf) { - vx_buf_release(src1_buf); + vx_buf_free(src1_buf); } if (src2_buf) { - vx_buf_release(src2_buf); + vx_buf_free(src2_buf); } if (dst_buf) { - vx_buf_release(dst_buf); + vx_buf_free(dst_buf); } if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } int main(int argc, char *argv[]) { int exitcode = 0; - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -187,26 +190,26 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src0_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src1_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl; - std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; - RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf)); - RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf)); - RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf)); - RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf)); + RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf)); + RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf)); + RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf)); + RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf)); for (int t = testid_s; t <= testid_e; ++t) { auto name = testMngr.get_name(t); @@ -226,18 +229,18 @@ int main(int argc, char *argv[]) { // upload source buffer0 std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0)); // upload source buffer1 std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0)); // clear destination buffer std::cout << "clear destination buffer" << std::endl; for (int i = 0; i < num_points; ++i) { ((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef; } - RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0)); // start device std::cout << "start device" << std::endl; @@ -249,7 +252,7 @@ int main(int argc, char *argv[]) { // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0)); // verify destination std::cout << "verify test result" << std::endl; diff --git a/tests/regression/fence/common.h b/tests/regression/fence/common.h index 3a38ae43..f10f8002 100644 --- a/tests/regression/fence/common.h +++ b/tests/regression/fence/common.h @@ -6,9 +6,9 @@ typedef struct { uint32_t num_tasks; uint32_t task_size; - uint32_t src0_ptr; - uint32_t src1_ptr; - uint32_t dst_ptr; + uint32_t src0_addr; + uint32_t src1_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/fence/kernel.c b/tests/regression/fence/kernel.c index d3c1c431..c7963143 100644 --- a/tests/regression/fence/kernel.c +++ b/tests/regression/fence/kernel.c @@ -5,9 +5,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) { uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; uint32_t offset = task_id * count; diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp index 29cc7d85..1c0765fa 100644 --- a/tests/regression/fence/main.cpp +++ b/tests/regression/fence/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -131,24 +134,24 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src0_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src1_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; - std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -166,7 +169,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0)); // upload source buffer1 { @@ -176,7 +179,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0)); // clear destination buffer { @@ -186,7 +189,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/io_addr/common.h b/tests/regression/io_addr/common.h index 6346c58e..ccb28bb9 100644 --- a/tests/regression/io_addr/common.h +++ b/tests/regression/io_addr/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t num_points; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/io_addr/kernel.c b/tests/regression/io_addr/kernel.c index 2f8483d8..c36bde7e 100644 --- a/tests/regression/io_addr/kernel.c +++ b/tests/regression/io_addr/kernel.c @@ -4,8 +4,8 @@ #include "common.h" void kernel_body(int task_id, kernel_arg_t* arg) { - uint32_t* src_ptr = (uint32_t*)arg->src_ptr; - uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr; + uint32_t* src_ptr = (uint32_t*)arg->src_addr; + uint32_t* dst_ptr = (uint32_t*)arg->dst_addr; int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]); diff --git a/tests/regression/io_addr/main.cpp b/tests/regression/io_addr/main.cpp index 42dcd7c0..0892efbc 100644 --- a/tests/regression/io_addr/main.cpp +++ b/tests/regression/io_addr/main.cpp @@ -30,6 +30,7 @@ std::vector ref_data; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -60,9 +61,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -105,7 +108,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -132,8 +135,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -150,7 +152,7 @@ int main(int argc, char *argv[]) { uint32_t num_points = count; - RT_CHECK(vx_alloc_dev_mem(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem)); + RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem)); // generate input data gen_input_data(num_points); @@ -171,15 +173,15 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value)); - kernel_arg.src_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, src_buf_size, &value)); + kernel_arg.src_addr = value; + RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_points = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; @@ -187,7 +189,7 @@ int main(int argc, char *argv[]) { std::max(src_buf_size, std::max(dst_buf_size, sizeof(kernel_arg_t)))); - RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -215,7 +217,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0)); // clear destination buffer { @@ -225,7 +227,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/mstress/common.h b/tests/regression/mstress/common.h index 12ed6290..6cd5ee67 100644 --- a/tests/regression/mstress/common.h +++ b/tests/regression/mstress/common.h @@ -9,9 +9,9 @@ typedef struct { uint32_t num_tasks; uint32_t size; uint32_t stride; - uint32_t addr_ptr; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src0_addr; + uint32_t src1_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/mstress/kernel.c b/tests/regression/mstress/kernel.c index 91d9a455..0ff17d2b 100644 --- a/tests/regression/mstress/kernel.c +++ b/tests/regression/mstress/kernel.c @@ -5,9 +5,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) { uint32_t stride = arg->stride; - uint32_t* addr_ptr = (uint32_t*)arg->addr_ptr; - float* src_ptr = (float*)arg->src_ptr; - float* dst_ptr = (float*)arg->dst_ptr; + uint32_t* addr_ptr = (uint32_t*)arg->src0_addr; + float* src_ptr = (float*)arg->src1_addr; + float* dst_ptr = (float*)arg->dst_addr; uint32_t offset = task_id * stride; diff --git a/tests/regression/mstress/main.cpp b/tests/regression/mstress/main.cpp index c2354edc..ab47dff3 100644 --- a/tests/regression/mstress/main.cpp +++ b/tests/regression/mstress/main.cpp @@ -73,6 +73,7 @@ std::vector addr_table; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -103,9 +104,12 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -140,7 +144,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -178,8 +182,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -219,19 +222,19 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, addr_buf_size, &value)); - kernel_arg.addr_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value)); - kernel_arg.src_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, addr_buf_size, &value)); + kernel_arg.src0_addr = value; + RT_CHECK(vx_mem_alloc(device, src_buf_size, &value)); + kernel_arg.src1_addr = value; + RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_tasks = num_tasks; kernel_arg.stride = count; - std::cout << "dev_addr=" << std::hex << kernel_arg.addr_ptr << std::endl; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_addr=" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; @@ -239,7 +242,7 @@ int main(int argc, char *argv[]) { std::max(addr_buf_size, std::max(dst_buf_size, sizeof(kernel_arg_t)))); - RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -257,7 +260,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload address buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.addr_ptr, addr_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, addr_buf_size, 0)); // upload source buffer1 { @@ -267,7 +270,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, src_buf_size, 0)); // clear destination buffer { @@ -277,7 +280,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/no_mf_ext/common.h b/tests/regression/no_mf_ext/common.h index f2638122..e59cab39 100644 --- a/tests/regression/no_mf_ext/common.h +++ b/tests/regression/no_mf_ext/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t size; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/no_mf_ext/kernel.c b/tests/regression/no_mf_ext/kernel.c index a0c19e53..72b66bdd 100644 --- a/tests/regression/no_mf_ext/kernel.c +++ b/tests/regression/no_mf_ext/kernel.c @@ -7,8 +7,8 @@ void main() { kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; uint32_t size = arg->size; - int32_t* src_ptr = (int32_t*)arg->src_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src_ptr = (int32_t*)arg->src_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; for (uint32_t i = 0; i < size; ++i) { dst_ptr[i] = src_ptr[i]; diff --git a/tests/regression/no_mf_ext/main.cpp b/tests/regression/no_mf_ext/main.cpp index 01ae744c..3780508d 100644 --- a/tests/regression/no_mf_ext/main.cpp +++ b/tests/regression/no_mf_ext/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -125,21 +127,21 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src_addr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.size = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -157,7 +159,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0)); // clear destination buffer { @@ -167,7 +169,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/no_smem/common.h b/tests/regression/no_smem/common.h index f2638122..e59cab39 100644 --- a/tests/regression/no_smem/common.h +++ b/tests/regression/no_smem/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t size; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/no_smem/kernel.c b/tests/regression/no_smem/kernel.c index a0c19e53..72b66bdd 100644 --- a/tests/regression/no_smem/kernel.c +++ b/tests/regression/no_smem/kernel.c @@ -7,8 +7,8 @@ void main() { kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; uint32_t size = arg->size; - int32_t* src_ptr = (int32_t*)arg->src_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src_ptr = (int32_t*)arg->src_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; for (uint32_t i = 0; i < size; ++i) { dst_ptr[i] = src_ptr[i]; diff --git a/tests/regression/no_smem/main.cpp b/tests/regression/no_smem/main.cpp index 01ae744c..3780508d 100644 --- a/tests/regression/no_smem/main.cpp +++ b/tests/regression/no_smem/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -125,21 +127,21 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src_addr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.size = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -157,7 +159,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0)); // clear destination buffer { @@ -167,7 +169,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/prefetch/common.h b/tests/regression/prefetch/common.h index 3a38ae43..29e9bcf5 100644 --- a/tests/regression/prefetch/common.h +++ b/tests/regression/prefetch/common.h @@ -6,9 +6,9 @@ typedef struct { uint32_t num_tasks; uint32_t task_size; - uint32_t src0_ptr; - uint32_t src1_ptr; - uint32_t dst_ptr; + uint32_t src0_addr; + uint32_t src1_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/prefetch/kernel.c b/tests/regression/prefetch/kernel.c index b852f582..30c86746 100644 --- a/tests/regression/prefetch/kernel.c +++ b/tests/regression/prefetch/kernel.c @@ -11,9 +11,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) { uint32_t offset = task_id * count; uint32_t num_blocks = (count * 4 + BLOCK_SIZE-1) / BLOCK_SIZE; - int32_t* src0_ptr = (int32_t*)arg->src0_ptr + offset; - int32_t* src1_ptr = (int32_t*)arg->src1_ptr + offset; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr + offset; + int32_t* src0_ptr = (int32_t*)arg->src0_addr + offset; + int32_t* src1_ptr = (int32_t*)arg->src1_addr + offset; + int32_t* dst_ptr = (int32_t*)arg->dst_addr + offset; uint32_t src0_end = (uint32_t)(src0_ptr + count); uint32_t src1_end = (uint32_t)(src1_ptr + count); diff --git a/tests/regression/prefetch/main.cpp b/tests/regression/prefetch/main.cpp index 8be0d2a4..02964df3 100644 --- a/tests/regression/prefetch/main.cpp +++ b/tests/regression/prefetch/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 0; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -131,24 +134,24 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src0_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src1_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src0_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src1_addr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; - std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -166,7 +169,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0)); // upload source buffer1 { @@ -176,7 +179,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0)); // clear destination buffer { @@ -186,7 +189,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/printf/common.h b/tests/regression/printf/common.h index be3af59d..70206d58 100644 --- a/tests/regression/printf/common.h +++ b/tests/regression/printf/common.h @@ -5,7 +5,7 @@ typedef struct { uint32_t num_points; - uint32_t src_ptr; + uint32_t src_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/printf/kernel.c b/tests/regression/printf/kernel.c index 340b4d97..ac2037c4 100644 --- a/tests/regression/printf/kernel.c +++ b/tests/regression/printf/kernel.c @@ -5,7 +5,7 @@ #include "common.h" void kernel_body(int task_id, kernel_arg_t* arg) { - int* src_ptr = (int*)arg->src_ptr; + int* src_ptr = (int*)arg->src_addr; vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]); } diff --git a/tests/regression/printf/main.cpp b/tests/regression/printf/main.cpp index b9d4db38..9eb13b4e 100644 --- a/tests/regression/printf/main.cpp +++ b/tests/regression/printf/main.cpp @@ -21,6 +21,7 @@ uint32_t count = 4; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -51,9 +52,10 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); vx_dev_close(device); } } @@ -71,8 +73,7 @@ int run_test() { } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -103,17 +104,17 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); - kernel_arg.src_ptr = value; + RT_CHECK(vx_mem_alloc(device, buf_size, &value)); + kernel_arg.src_addr = value; kernel_arg.num_points = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -131,7 +132,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/sort/common.h b/tests/regression/sort/common.h index 6346c58e..ccb28bb9 100644 --- a/tests/regression/sort/common.h +++ b/tests/regression/sort/common.h @@ -5,8 +5,8 @@ typedef struct { uint32_t num_points; - uint32_t src_ptr; - uint32_t dst_ptr; + uint32_t src_addr; + uint32_t dst_addr; } kernel_arg_t; #endif \ No newline at end of file diff --git a/tests/regression/sort/kernel.c b/tests/regression/sort/kernel.c index 5798aafa..4fabbe40 100644 --- a/tests/regression/sort/kernel.c +++ b/tests/regression/sort/kernel.c @@ -5,8 +5,8 @@ void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) { uint32_t num_points = arg->num_points; - int32_t* src_ptr = (int32_t*)arg->src_ptr; - int32_t* dst_ptr = (int32_t*)arg->dst_ptr; + int32_t* src_ptr = (int32_t*)arg->src_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; int32_t ref_value = src_ptr[task_id]; diff --git a/tests/regression/sort/main.cpp b/tests/regression/sort/main.cpp index 96032a91..66c8549f 100644 --- a/tests/regression/sort/main.cpp +++ b/tests/regression/sort/main.cpp @@ -25,6 +25,7 @@ std::vector ref_data; vx_device_h device = nullptr; vx_buffer_h staging_buf = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Test." << std::endl; @@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (staging_buf) { - vx_buf_release(staging_buf); + vx_buf_free(staging_buf); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -102,7 +105,7 @@ int run_test(const kernel_arg_t& kernel_arg, // download destination buffer std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0)); // verify result std::cout << "verify result" << std::endl; @@ -129,8 +132,7 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - size_t value; - kernel_arg_t kernel_arg; + size_t value; // parse command arguments parse_args(argc, argv); @@ -166,22 +168,22 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value)); - kernel_arg.src_ptr = value; - RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value)); - kernel_arg.dst_ptr = value; + RT_CHECK(vx_mem_alloc(device, src_buf_size, &value)); + kernel_arg.src_addr = value; + RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value)); + kernel_arg.dst_addr = value; kernel_arg.num_points = num_points; - std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl; - std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl; // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t staging_buf_size = std::max(src_buf_size, std::max(dst_buf_size, sizeof(kernel_arg_t))); - RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf)); + RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf)); // upload kernel argument std::cout << "upload kernel argument" << std::endl; @@ -199,7 +201,7 @@ int main(int argc, char *argv[]) { } } std::cout << "upload source buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0)); // clear destination buffer { @@ -209,7 +211,7 @@ int main(int argc, char *argv[]) { } } std::cout << "clear destination buffer" << std::endl; - RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0)); + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0)); // run tests std::cout << "run tests" << std::endl; diff --git a/tests/regression/tex/main.cpp b/tests/regression/tex/main.cpp index 5ea47cc0..515ff2fd 100644 --- a/tests/regression/tex/main.cpp +++ b/tests/regression/tex/main.cpp @@ -35,6 +35,7 @@ ePixelFormat eformat = FORMAT_A8R8G8B8; vx_device_h device = nullptr; vx_buffer_h buffer = nullptr; +kernel_arg_t kernel_arg; static void show_usage() { std::cout << "Vortex Texture Test." << std::endl; @@ -95,9 +96,11 @@ static void parse_args(int argc, char **argv) { void cleanup() { if (buffer) { - vx_buf_release(buffer); + vx_buf_free(buffer); } if (device) { + vx_mem_free(device, kernel_arg.src_addr); + vx_mem_free(device, kernel_arg.dst_addr); vx_dev_close(device); } } @@ -141,7 +144,6 @@ int run_test(const kernel_arg_t& kernel_arg, } int main(int argc, char *argv[]) { - kernel_arg_t kernel_arg; std::vector src_pixels; std::vector mip_offsets; uint32_t src_width; @@ -196,8 +198,8 @@ int main(int argc, char *argv[]) { // allocate device memory std::cout << "allocate device memory" << std::endl; uint64_t src_addr, dst_addr; - RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr)); - RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr)); + RT_CHECK(vx_mem_alloc(device, src_bufsize, &src_addr)); + RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr)); std::cout << "src_addr=0x" << std::hex << src_addr << std::endl; std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl; @@ -206,7 +208,7 @@ int main(int argc, char *argv[]) { std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(sizeof(kernel_arg_t), std::max(src_bufsize, dst_bufsize)); - RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); + RT_CHECK(vx_buf_alloc(device, alloc_size, &buffer)); // upload kernel argument std::cout << "upload kernel argument" << std::endl;