refactoring device memory allocation and cleanup

This commit is contained in:
Blaise Tine
2022-01-28 21:57:16 -05:00
parent 29df0da8b5
commit f7887d8720
49 changed files with 875 additions and 373 deletions

View File

@@ -18,6 +18,7 @@
#endif
#include "vx_utils.h"
#include "vx_malloc.h"
#include <vortex.h>
#include <VX_config.h>
#include "vortex_afu.h"
@@ -51,14 +52,25 @@
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
class vx_device {
public:
vx_device()
: mem_allocator(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
4096,
CACHE_BLOCK_SIZE)
{}
~vx_device() {}
fpga_handle fpga;
uint64_t mem_allocation;
vortex::MemoryAllocator mem_allocator;
unsigned version;
unsigned num_cores;
unsigned num_warps;
unsigned num_threads;
} vx_device_t;
};
typedef struct vx_buffer_ {
uint64_t wsid;
@@ -102,7 +114,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
vx_device *device = ((vx_device*)hdevice);
switch (caps_id) {
case VX_CAPS_VERSION:
@@ -143,7 +155,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
return -1;
fpga_handle accel_handle;
vx_device_t* device;
vx_device* device;
#ifndef USE_VLSIM
fpga_result res;
@@ -204,14 +216,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
#endif
// allocate device object
device = (vx_device_t*)malloc(sizeof(vx_device_t));
device = new vx_device();
if (nullptr == device) {
fpgaClose(accel_handle);
return -1;
}
device->fpga = accel_handle;
device->mem_allocation = ALLOC_BASE_ADDR;
{
// Load device CAPS
@@ -254,7 +265,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
vx_device *device = ((vx_device*)hdevice);
#ifdef SCOPE
vx_scope_stop(device->fpga);
@@ -267,30 +278,30 @@ extern int vx_dev_close(vx_device_h hdevice) {
fpgaClose(device->fpga);
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
*dev_maddr = device->mem_allocation;
device->mem_allocation += asize;
return 0;
vx_device *device = ((vx_device*)hdevice);
return device->mem_allocator.allocate(size, dev_maddr);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->mem_allocator.release(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
@@ -302,7 +313,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h*
|| nullptr == hbuffer)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
vx_device *device = ((vx_device*)hdevice);
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@@ -344,12 +355,12 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
vx_device *device = ((vx_device*)buffer->hdevice);
fpgaReleaseBuffer(device->fpga, buffer->wsid);
@@ -364,7 +375,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
std::unordered_map<int, std::stringstream> print_bufs;
vx_device_t *device = ((vx_device_t*)hdevice);
vx_device *device = ((vx_device*)hdevice);
struct timespec sleep_time;
@@ -427,7 +438,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
vx_device *device = ((vx_device*)buffer->hdevice);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@@ -468,7 +479,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
vx_device *device = ((vx_device*)buffer->hdevice);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@@ -507,7 +518,7 @@ extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
vx_device *device = ((vx_device*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)

399
driver/common/vx_malloc.h Normal file
View File

@@ -0,0 +1,399 @@
#pragma once
#include <cstdint>
#include <assert.h>
namespace vortex {
class MemoryAllocator {
public:
MemoryAllocator(
uint64_t minAddress,
uint64_t maxAddress,
uint32_t pageAlign,
uint32_t blockAlign)
: nextAddress_(minAddress)
, maxAddress_(maxAddress)
, pageAlign_(pageAlign)
, blockAlign_(blockAlign)
, pages_(nullptr)
{}
~MemoryAllocator() {
// Free allocated pages
page_t* pCurPage = pages_;
while (pCurPage) {
auto nextPage = pCurPage->next;
this->DeletePage(pCurPage);
pCurPage = nextPage;
}
}
int allocate(uint64_t size, uint64_t* addr) {
if (size == 0 || addr == nullptr)
return -1;
// Align allocation size
size = AlignSize(size, blockAlign_);
// Walk thru all pages to find a free block
block_t* pFreeBlock = nullptr;
auto pCurPage = pages_;
while (pCurPage) {
auto pCurBlock = pCurPage->pFreeSList;
if (pCurBlock) {
// The free list is already sorted with biggest block on top,
// just check if the last block has enough space.
if (pCurBlock->size >= size) {
// Find the smallest matching block
while (pCurBlock->nextFreeS
&& (pCurBlock->nextFreeS->size >= size)) {
pCurBlock = pCurBlock->nextFreeS;
}
// Return the free block
pFreeBlock = pCurBlock;
break;
}
}
pCurPage = pCurPage->next;
}
if (nullptr == pFreeBlock) {
// Allocate a new page for this request
pCurPage = this->NewPage(size);
if (nullptr == pCurPage)
return -1;
pFreeBlock = pCurPage->pFreeSList;
}
// Remove the block from the free lists
assert(pFreeBlock->size >= size);
pCurPage->RemoveFreeMBlock(pFreeBlock);
pCurPage->RemoveFreeSBlock(pFreeBlock);
// If the free block we have found is larger than what we are looking for,
// we may be able to split our free block in two.
uint64_t extraBytes = pFreeBlock->size - size;
if (extraBytes >= blockAlign_) {
// Reduce the free block size to the requested value
pFreeBlock->size = size;
// Allocate a new block to contain the extra buffer
auto nextAddr = pFreeBlock->addr + size;
auto pNewBlock = new block_t(nextAddr, extraBytes);
// Add the new block to the free lists
pCurPage->InsertFreeMBlock(pNewBlock);
pCurPage->InsertFreeSBlock(pNewBlock);
}
// Insert the free block into the used list
pCurPage->InsertUsedBlock(pFreeBlock);
// Return the free block address
*addr = pFreeBlock->addr;
return 0;
}
int release(uint64_t addr) {
// Walk all pages to find the pointer
block_t* pUsedBlock = nullptr;
auto pCurPage = pages_;
while (pCurPage) {
if ((pCurPage->addr < addr)
&& ((pCurPage->addr + pCurPage->size) > addr)) {
auto pCurBlock = pCurPage->pUsedList;
while (pCurBlock) {
if (pCurBlock->addr == addr) {
pUsedBlock = pCurBlock;
break;
}
pCurBlock = pCurBlock->nextUsed;
}
if (pUsedBlock)
break;
}
pCurPage = pCurPage->next;
}
// found the corresponding block?
if (nullptr == pUsedBlock)
return -1;
// Remove the block from the used list
pCurPage->RemoveUsedBlock(pUsedBlock);
// Insert the block into the free M-list.
pCurPage->InsertFreeMBlock(pUsedBlock);
// Check if we can merge adjacent free blocks from the left.
if (pUsedBlock->prevFreeM) {
// Calculate the previous address
auto prevAddr = pUsedBlock->prevFreeM->addr + pUsedBlock->prevFreeM->size;
if (pUsedBlock->addr == prevAddr) {
auto pMergedBlock = pUsedBlock->prevFreeM;
// Detach left block from the free S-list
pCurPage->RemoveFreeSBlock(pMergedBlock);
// Merge the blocks to the left
pMergedBlock->size += pUsedBlock->size;
pMergedBlock->nextFreeM = pUsedBlock->nextFreeM;
if (pMergedBlock->nextFreeM) {
pMergedBlock->nextFreeM->prevFreeM = pMergedBlock;
}
pUsedBlock = pMergedBlock;
}
}
// Check if we can merge adjacent free blocks from the right.
if (pUsedBlock->nextFreeM) {
// Calculate the next allocation start address
auto nextMem = pUsedBlock->addr + pUsedBlock->size;
if (pUsedBlock->nextFreeM->addr == nextMem) {
auto nextBlock = pUsedBlock->nextFreeM;
// Detach right block from the free S-list
pCurPage->RemoveFreeSBlock(nextBlock);
// Merge the blocks to the right
pUsedBlock->size += nextBlock->size;
pUsedBlock->nextFreeM = nextBlock->nextFreeM;
if (pUsedBlock->nextFreeM) {
pUsedBlock->nextFreeM->prevFreeM = pUsedBlock;
}
}
}
// Insert the block into the free S-list.
pCurPage->InsertFreeSBlock(pUsedBlock);
// Check if we can free empty pages
if (nullptr == pCurPage->pUsedList) {
// Try to delete the page
while (pCurPage && this->DeletePage(pCurPage)) {
pCurPage = this->NextEmptyPage();
}
}
return 0;
}
private:
struct block_t {
block_t* nextFreeS;
block_t* prevFreeS;
block_t* nextFreeM;
block_t* prevFreeM;
block_t* nextUsed;
block_t* prevUsed;
uint64_t addr;
uint64_t size;
block_t(uint64_t addr, uint64_t size)
: nextFreeS(nullptr)
, prevFreeS(nullptr)
, nextFreeM(nullptr)
, prevFreeM(nullptr)
, nextUsed(nullptr)
, prevUsed(nullptr)
, addr(addr)
, size(size)
{}
};
struct page_t {
page_t* next;
// List of used blocks
block_t* pUsedList;
// List with blocks sorted by descreasing sizes
// Used for block lookup during memory allocation.
block_t* pFreeSList;
// List with blocks sorted by increasing memory addresses
// Used for block merging during memory release.
block_t* pFreeMList;
uint64_t addr;
uint64_t size;
page_t(uint64_t addr, uint64_t size) :
next(nullptr),
pUsedList(nullptr),
addr(addr),
size(size) {
pFreeSList = pFreeMList = new block_t(addr, size);
}
void InsertUsedBlock(block_t* pBlock) {
pBlock->nextUsed = pUsedList;
if (pUsedList) {
pUsedList->prevUsed = pBlock;
}
pUsedList = pBlock;
}
void RemoveUsedBlock(block_t* pBlock) {
if (pBlock->prevUsed) {
pBlock->prevUsed->nextUsed = pBlock->nextUsed;
} else {
pUsedList = pBlock->nextUsed;
}
if (pBlock->nextUsed) {
pBlock->nextUsed->prevUsed = pBlock->prevUsed;
}
pBlock->nextUsed = nullptr;
pBlock->prevUsed = nullptr;
}
void InsertFreeMBlock(block_t* pBlock) {
block_t* pCurBlock = pFreeMList;
block_t* prevBlock = nullptr;
while (pCurBlock && (pCurBlock->addr < pBlock->addr)) {
prevBlock = pCurBlock;
pCurBlock = pCurBlock->nextFreeM;
}
pBlock->nextFreeM = pCurBlock;
pBlock->prevFreeM = prevBlock;
if (prevBlock) {
prevBlock->nextFreeM = pBlock;
} else {
pFreeMList = pBlock;
}
if (pCurBlock) {
pCurBlock->prevFreeM = pBlock;
}
}
void RemoveFreeMBlock(block_t* pBlock) {
if (pBlock->prevFreeM) {
pBlock->prevFreeM->nextFreeM = pBlock->nextFreeM;
} else {
pFreeMList = pBlock->nextFreeM;
}
if (pBlock->nextFreeM) {
pBlock->nextFreeM->prevFreeM = pBlock->prevFreeM;
}
pBlock->nextFreeM = nullptr;
pBlock->prevFreeM = nullptr;
}
void InsertFreeSBlock(block_t* pBlock) {
block_t* pCurBlock = this->pFreeSList;
block_t* prevBlock = nullptr;
while (pCurBlock && (pCurBlock->size > pBlock->size)) {
prevBlock = pCurBlock;
pCurBlock = pCurBlock->nextFreeS;
}
pBlock->nextFreeS = pCurBlock;
pBlock->prevFreeS = prevBlock;
if (prevBlock) {
prevBlock->nextFreeS = pBlock;
} else {
this->pFreeSList = pBlock;
}
if (pCurBlock) {
pCurBlock->prevFreeS = pBlock;
}
}
void RemoveFreeSBlock(block_t* pBlock) {
if (pBlock->prevFreeS) {
pBlock->prevFreeS->nextFreeS = pBlock->nextFreeS;
} else {
pFreeSList = pBlock->nextFreeS;
}
if (pBlock->nextFreeS) {
pBlock->nextFreeS->prevFreeS = pBlock->prevFreeS;
}
pBlock->nextFreeS = nullptr;
pBlock->prevFreeS = nullptr;
}
};
page_t* NewPage(uint64_t size) {
// Increase buffer size to include the page and first block size
// also add padding to ensure page aligment
size = AlignSize(size, pageAlign_);
// Allocate page memory
auto addr = nextAddress_;
nextAddress_ += size;
// Overflow check
if (nextAddress_ > maxAddress_)
return nullptr;
// Allocate the page
auto pNewPage = new page_t(addr, size);
// Insert the new page into the list
pNewPage->next = pages_;
pages_ = pNewPage;
return pNewPage;
}
bool DeletePage(page_t* pPage) {
// The page should be empty
assert(nullptr == pPage->pUsedList);
assert(pPage->pFreeMList && (nullptr == pPage->pFreeMList->nextFreeM));
// Only delete top-level pages
auto nextAddr = pPage->addr + pPage->size;
if (nextAddr != nextAddress_)
return false;
// Remove the page from the list
page_t* prevPage = nullptr;
auto pCurPage = pages_;
while (pCurPage) {
if (pCurPage == pPage) {
if (prevPage) {
prevPage->next = pCurPage->next;
} else {
pages_ = pCurPage->next;
}
break;
}
prevPage = pCurPage;
pCurPage = pCurPage->next;
}
// Update next allocation address
nextAddress_ = pPage->addr;
return true;
}
page_t* NextEmptyPage() {
auto pCurPage = pages_;
while (pCurPage) {
if (nullptr == pCurPage->pUsedList)
return pCurPage;
pCurPage = pCurPage->next;
}
return nullptr;
}
static uint64_t AlignSize(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
uint64_t nextAddress_;
uint64_t maxAddress_;
uint32_t pageAlign_;
uint32_t blockAlign_;
page_t* pages_;
};
} // namespace vortex

View File

@@ -22,7 +22,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 65536;
uint32_t buffer_transfer_size = 65536; // 64 KB
uint64_t kernel_base_addr;
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
if (err != 0)
@@ -30,7 +30,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
// allocate device buffer
vx_buffer_h buffer;
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
err = vx_buf_alloc(device, buffer_transfer_size, &buffer);
if (err != 0)
return -1;
@@ -54,13 +54,13 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint6
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
if (err != 0) {
vx_buf_release(buffer);
vx_buf_free(buffer);
return err;
}
offset += chunk_size;
}
vx_buf_release(buffer);
vx_buf_free(buffer);
return 0;
}
@@ -149,7 +149,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
return ret;
vx_buffer_h staging_buf;
ret = vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
ret = vx_buf_alloc(device, 64 * sizeof(uint32_t), &staging_buf);
if (ret != 0)
return ret;
@@ -158,7 +158,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
if (ret != 0) {
vx_buf_release(staging_buf);
vx_buf_free(staging_buf);
return ret;
}
@@ -336,7 +336,21 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
#endif
// release allocated resources
vx_buf_release(staging_buf);
vx_buf_free(staging_buf);
return ret;
}
// Deprecated API functions
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
return vx_buf_alloc(hdevice, size, hbuffer);
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
return vx_buf_free(hbuffer);
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
return vx_mem_alloc(hdevice, size, dev_maddr);
}

View File

@@ -6,8 +6,7 @@ SCRIPT_DIR=../../hw/scripts
OPAE_SYN_DIR=../../hw/syn/opae
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
@@ -39,6 +38,13 @@ PROJECT = libvortex.so
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE

View File

@@ -35,16 +35,19 @@ int vx_dev_close(vx_device_h hdevice);
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
// release buffer
int vx_buf_free(vx_buffer_h hbuffer);
// Get host pointer address
void* vx_host_ptr(vx_buffer_h hbuffer);
// release buffer
int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
// release device memory
int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
@@ -69,6 +72,11 @@ int vx_upload_kernel_file(vx_device_h device, const char* filename);
// dump performance counters
int vx_dump_perf(vx_device_h device, FILE* stream);
//////////////////////////// DEPRECATED FUNCTIONS /////////////////////////////
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
int vx_buf_release(vx_buffer_h hbuffer);
#ifdef __cplusplus
}
#endif

View File

@@ -1,7 +1,6 @@
RTLSIM_DIR = ../../sim/rtlsim
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
@@ -19,6 +18,13 @@ LDFLAGS += -L. -lrtlsim
SRCS = vortex.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE

View File

@@ -8,6 +8,7 @@
#include <chrono>
#include <vortex.h>
#include <vx_malloc.h>
#include <vx_utils.h>
#include <VX_config.h>
#include <mem.h>
@@ -60,7 +61,11 @@ class vx_device {
public:
vx_device()
: ram_(RAM_PAGE_SIZE)
, mem_allocation_(ALLOC_BASE_ADDR)
, mem_allocator_(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
{
processor_.attach_ram(&ram_);
}
@@ -72,13 +77,11 @@ public:
}
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
return mem_allocator_.allocate(size, dev_maddr);
}
int free_local_mem(uint64_t dev_maddr) {
return mem_allocator_.release(dev_maddr);
}
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
@@ -149,7 +152,7 @@ private:
RAM ram_;
Processor processor_;
uint64_t mem_allocation_;
MemoryAllocator mem_allocator_;
std::future<void> future_;
};
@@ -250,7 +253,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
@@ -260,8 +263,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
vx_device *device = ((vx_device*)hdevice);
return device->free_local_mem(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
@@ -289,7 +299,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;

View File

@@ -1,7 +1,6 @@
SIMX_DIR = ../../sim/simx
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
@@ -11,7 +10,14 @@ CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
LDFLAGS += -L. -lsimx
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS = vortex.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = libvortex.so

View File

@@ -8,6 +8,7 @@
#include <vortex.h>
#include <vx_utils.h>
#include <vx_malloc.h>
#include <VX_config.h>
@@ -66,7 +67,11 @@ public:
: arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS)
, ram_(RAM_PAGE_SIZE)
, processor_(arch_)
, mem_allocation_(ALLOC_BASE_ADDR)
, mem_allocator_(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
{
// attach memory module
processor_.attach_ram(&ram_);
@@ -76,16 +81,14 @@ public:
if (future_.valid()) {
future_.wait();
}
}
}
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
return mem_allocator_.allocate(size, dev_maddr);
}
int free_local_mem(uint64_t dev_maddr) {
return mem_allocator_.release(dev_maddr);
}
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
@@ -151,7 +154,7 @@ private:
ArchDef arch_;
RAM ram_;
Processor processor_;
uint64_t mem_allocation_;
MemoryAllocator mem_allocator_;
std::future<void> future_;
};
@@ -252,7 +255,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
@@ -262,7 +265,15 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_ma
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->free_local_mem(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
@@ -290,7 +301,7 @@ extern void* vx_host_ptr(vx_buffer_h hbuffer) {
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;

View File

@@ -1,5 +1,4 @@
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../runtime -I../../hw

View File

@@ -12,11 +12,15 @@ extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t*
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_maddr*/) {
return -1;
}
extern int vx_buf_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
@@ -24,7 +28,7 @@ extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
return nullptr;
}
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
extern int vx_buf_free(vx_buffer_h /*hbuffer*/) {
return -1;
}