runtime static library

This commit is contained in:
Blaise Tine
2020-06-27 14:13:13 -04:00
parent b7d7e69f47
commit 8a306de02d
73 changed files with 360 additions and 94341 deletions

View File

@@ -1,5 +1,3 @@
all: stub rtlsim simx
stub:

View File

@@ -1,37 +0,0 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VX_RT_PATH ?= $(wildcard ../../runtime)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
CFLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostdlib
LIBS = $(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib/libc.a $(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
PROJECT = demo
SRCS = main.c
all: $(PROJECT).dump $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).elf: $(SRCS)
$(CC) $(CFLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(SRCS) $(LIBS) -I$(VX_RT_PATH) -o $(PROJECT).elf
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View File

@@ -1,65 +0,0 @@
#include "intrinsics/vx_intrinsics.h"
#include "io/vx_io.h"
#include "vx_api/vx_api.h"
typedef struct
{
unsigned * x;
unsigned * y;
unsigned * z;
unsigned numColums;
unsigned numRows;
} mat_add_args_t;
unsigned x[] = {5, 5, 5, 5,
6, 6, 6, 6,
7, 7, 7, 7,
8, 8, 8, 8};
unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1};
unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0};
void mat_add_kernel(void * void_arguments)
{
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
unsigned wid = vx_warpID();
unsigned tid = vx_threadID();
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
__if (valid)
{
unsigned index = (wid * arguments->numColums) + tid;
arguments->z[index] = arguments->x[index] + arguments->y[index];
}
__endif
}
int main()
{
// Main is called with all threads active of warp 0
vx_tmc(1);
vx_print_str("Demo kernel\n");
mat_add_args_t arguments;
arguments.x = x;
arguments.y = y;
arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
vx_spawnWarps(4, 4, mat_add_kernel, &arguments);
vx_print_str("done.");
return 0;
}

View File

@@ -1,44 +0,0 @@
CXXFLAGS += -O3 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../sw
LDFLAGS += -L./obj_dir
DRV_CFLAGS += -O3 -Wall -Wextra -pedantic -Wfatal-errors
DRV_CFLAGS += -I../../sw
DRV_CFLAGS += -fPIC
DRV_LDFLAGS += -shared -pthread
DRV_SRCS = vx_driver.cpp ../../simX/args.cpp ../../simX/mem.cpp ../../simX/core.cpp ../../simX/instruction.cpp ../../simX/enc.cpp ../../simX/util.cpp
RTL_TOP = ../../simX/cache_simX.v
RTL_INCLUDE = -I../../rtl/shared_memory -I../../rtl/cache -I../../rtl/interfaces -Isimulate -I../../rtl
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
VL_FLAGS += --trace -DVL_DEBUG=1
PROJECT = libvxdrv_sim.so
all: $(PROJECT) test
$(PROJECT): $(SIMX_SRCS)
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(DRV_SRCS) -CFLAGS '$(DRV_CFLAGS)' -LDFLAGS '$(DRV_LDFLAGS)' -o $(PROJECT)
make -j -C obj_dir -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1
test: $(PROJECT) test.o utils.o
$(CXX) $(CXXFLAGS) test.o utils.o $(LDFLAGS) -lvxdrv_sim -o $@
utils.o: ../sw/utils.cpp
$(CXX) $(CXXFLAGS) -c ../sw/utils.cpp -o $@
test.o: ../sw/test.cpp
$(CXX) $(CXXFLAGS) -c ../sw/test.cpp -o $@
clean:
rm -rf $(PROJECT) test *.so *.o obj_dir

View File

@@ -1,272 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vx_driver.h>
#include "../../simX/include/debug.h"
#include "../../simX/include/types.h"
#include "../../simX/include/core.h"
#include "../../simX/include/enc.h"
#include "../../simX/include/instruction.h"
#include "../../simX/include/mem.h"
#include "../../simX/include/obj.h"
#include "../../simX/include/archdef.h"
#include "../../simX/include/help.h"
#define CACHE_LINESIZE 64
#define PAGE_SIZE 4096
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
return CACHE_LINESIZE * ((size + CACHE_LINESIZE - 1) / CACHE_LINESIZE);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
auto data() const {
return data_;
}
auto size() const {
return size_;
}
auto device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: is_done_(false)
, is_running_(false)
, thread_(__thread_proc__, this)
{}
~vx_device() {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_.join();
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
if (dest_addr + size > ram_.size())
return -1;
ram_.write(dest_addr, size, (uint8_t*)src + src_offset);
return 0;
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
if (src_addr + size > ram_.size())
return -1;
ram_.read(src_addr, size, (uint8_t*)dest + dest_offset);
return 0;
}
int start() {
if (this->wait(-1) != 0)
return -1;
mutex_.lock();
is_running_ = true;
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
for (;;) {
mutex_.lock();
bool is_running = is_running_;
mutex_.unlock();
if (!is_running || 0 == timeout--)
break;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return 0;
}
private:
void run() {
Harp::ArchDef arch("rv32i", false);
Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu);
mu.attach(ram_, 0);
while (core.running()) {
core.step();
}
core.printStats();
}
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
bool is_running = is_running_;
mutex_.unlock();
if (is_done)
break;
if (is_running) {
std::cout << "Device running..." << std::endl;
this->run();
mutex_.lock();
is_running_ = false;
mutex_.unlock();
std::cout << "Device ready..." << std::endl;
}
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
bool is_running_;
std::thread thread_;
Harp::RAM ram_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern vx_device_h vx_dev_open() {
auto device = new vx_device();
return (vx_device_h)device;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
delete (vx_device*)hdevice;
return 0;
}
extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) {
if (nullptr == hdevice)
return nullptr;
auto buffer = new vx_buffer(size, (vx_device*)hdevice);
if (nullptr == buffer->data()) {
delete buffer;
return nullptr;
}
return (vx_buffer*)buffer;
}
extern void* vs_buf_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
return ((vx_buffer*)hbuffer)->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
delete (vx_buffer*)hbuffer;
return 0;
}
extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) {
if (nullptr == hbuffer)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dest_addr, size, src_offset);
}
extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), src_addr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
return ((vx_device*)hdevice)->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
return ((vx_device*)hdevice)->wait(timeout);
}

View File

@@ -1,71 +0,0 @@
DRV_CFLAGS += -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
DRV_CFLAGS += -I/tools/opae/1.4.0/include
DRV_LDFLAGS += -L/tools/opae/1.4.0/lib
# stack execution protection
DRV_LDFLAGS +=-z noexecstack
# data relocation and projection
DRV_LDFLAGS +=-z relro -z now
# stack buffer overrun detection
# Note that CentOS 7 has gcc 4.8 by default. When we switch
# to a system with gcc 4.9 or newer this should be changed to
# CFLAGS="-fstack-protector-strong"
DRV_CFLAGS +=-fstack-protector
# Position independent code
DRV_CFLAGS += -fPIC
DRV_LDFLAGS += -luuid
DRV_LDFLAGS += -shared
FPGA_LIBS += -lopae-c
ASE_LIBS += -lopae-c-ase
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
LDFLAGS += -L.
PROJECT = libvxdrv.so
PROJECT_ASE = libvxdrv_ase.so
AFU_JSON_INFO = vortex_afu.h
all: $(PROJECT) $(PROJECT_ASE) test test_ase
# AFU info from JSON file, including AFU UUID
$(AFU_JSON_INFO): ../hw/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(PROJECT): vx_driver.o
$(CC) $(DRV_CFLAGS) $^ $(DRV_LDFLAGS) $(FPGA_LIBS) -o $@
$(PROJECT_ASE): vx_driver.o
$(CC) $(DRV_CFLAGS) -DUSE_ASE $^ $(DRV_LDFLAGS) $(ASE_LIBS) -o $@
test: test.o utils.o $(PROJECT)
$(CXX) $(CXXFLAGS) test.o utils.o $(LDFLAGS) -lvxdrv -o $@
test_ase: test.o utils.o $(PROJECT_ASE)
$(CXX) $(CXXFLAGS) -DUSE_ASE test.o utils.o $(LDFLAGS) -lvxdrv_ase -o $@
vx_driver.o: vx_driver.c
$(CC) $(DRV_CFLAGS) -c $^ -o $@
test.o: test.cpp $(AFU_JSON_INFO)
$(CXX) $(CXXFLAGS) -c test.cpp -o $@
.depend: vx_driver.c test.cpp
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) $(PROJECT_ASE) test test_ase $(AFU_JSON_INFO) *.so *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -1,69 +0,0 @@
#include <vx_driver.h>
#include <iostream>
#include <unistd.h>
#include "utils.h"
#define CACHE_LINESIZE 64
const char* program_file = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-f: program] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "f:h?")) != -1) {
switch (c) {
case 'f': {
program_file = optarg;
} break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
if (nullptr == program_file) {
show_usage();
exit(-1);
}
}
int main(int argc, char *argv[]) {
// parse command arguments
parse_args(argc, argv);
// open device connection
auto device = vx_dev_open();
// upload program
if (0 != upload_program(device, program_file)) {
vx_dev_close(device);
return -1;
}
// start device
if (0 != vx_start(device)) {
vx_dev_close(device);
return -1;
}
// wait for completion
if (0 != vx_ready_wait(device, -1)) {
vx_dev_close(device);
return -1;
}
// close device
vx_dev_close(device);
return 0;
}

View File

@@ -1,156 +0,0 @@
#include <iostream>
#include <fstream>
#include "utils.h"
static uint32_t hti_old(char c) {
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
return c - '0';
}
static uint32_t hToI_old(char *c, uint32_t size) {
uint32_t value = 0;
for (uint32_t i = 0; i < size; i++) {
value += hti_old(c[i]) << ((size - i - 1) * 4);
}
return value;
}
int parse_ihex_line(char* line, ihex_t* out) {
if (line[0] != ':') {
std::cout << "error: invalid line entry!" << std::endl;
return -1;
}
uint32_t data_size = 0;
uint32_t address = 0;
uint32_t offset = 0;
bool has_offset = false;
bool is_eof = false;
auto record_type = hToI_old(line + 7, 2);
switch (record_type) {
case 0: { // data
data_size = hToI_old(line + 1, 2);
address = hToI_old(line + 3, 4);
for (uint32_t i = 0; i < data_size; i++) {
out->data[i] = hToI_old(line + 9 + i * 2, 2);
}
} break;
case 1: // end of file
is_eof = true;
break;
case 2: // extended segment address
offset = hToI_old(line + 9, 4) << 4;
has_offset = true;
break;
case 3: // start segment address
break;
case 4: // extended linear address
offset = hToI_old(line + 9, 4) << 16;
has_offset = true;
break;
case 5: // start linear address
break;
default:
return -1;
}
out->address = address;
out->data_size = data_size;
out->offset = offset;
out->has_offset = has_offset;
out->is_eof = is_eof;
return 0;
}
int upload_program(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;
return -1;
}
uint32_t transfer_size = 16 * VX_CACHE_LINESIZE;
// allocate device buffer
auto buffer = vx_buf_alloc(device, transfer_size);
if (nullptr == buffer)
return -1;
// get buffer address
auto buf_ptr = (uint8_t*)vs_buf_ptr(buffer);
//
// copy initialization routine
//
((uint32_t*)buf_ptr)[0] = 0xf1401073;
((uint32_t*)buf_ptr)[1] = 0xf1401073;
((uint32_t*)buf_ptr)[2] = 0x30101073;
((uint32_t*)buf_ptr)[3] = 0x800000b7;
((uint32_t*)buf_ptr)[4] = 0x000080e7;
vx_copy_to_fpga(buffer, 0, 5 * 4, 0);
//
// copy hex program
//
char line[ihex_t::MAX_LINE_SIZE];
uint32_t hex_offset = 0;
uint32_t prev_hex_address = 0;
uint32_t dest_address = -1;
uint32_t src_offset = 0;
while (true) {
ifs.getline(line, ihex_t::MAX_LINE_SIZE);
if (!ifs)
break;
ihex_t ihex;
parse_ihex_line(line, &ihex);
if (ihex.is_eof)
break;
if (ihex.has_offset) {
hex_offset = ihex.offset;
}
if (ihex.data_size != 0) {
auto hex_address = ihex.address + hex_offset;
if (dest_address == (uint32_t)-1) {
dest_address = (hex_address / VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE;
src_offset = hex_address - dest_address;
} else {
auto delta = hex_address - prev_hex_address;
src_offset += delta;
}
for (uint32_t i = 0; i < ihex.data_size; ++i) {
if (src_offset >= transfer_size) {
// flush current batch to FPGA
vx_copy_to_fpga(buffer, dest_address, transfer_size, 0);
dest_address = (hex_address/ VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE;
src_offset = hex_address - dest_address;
}
buf_ptr[src_offset++] = ihex.data[i];
++hex_address;
}
prev_hex_address = hex_address;
}
}
// flush last batch to FPGA
if (src_offset) {
vx_copy_to_fpga(buffer, dest_address, src_offset, 0);
}
vx_buf_release(buffer);
return 0;
}

View File

@@ -1,19 +0,0 @@
#pragma once
#include <vx_driver.h>
struct ihex_t {
static constexpr int MAX_LINE_SIZE = 524;
static constexpr int MAX_DATA_SIZE = 255;
uint8_t data[MAX_DATA_SIZE];
uint32_t address;
uint32_t data_size;
uint32_t offset;
bool has_offset;
bool is_eof;
};
int parse_ihex_line(char* line, ihex_t* out);
int upload_program(vx_device_h device, const char* filename);

View File

@@ -1,259 +0,0 @@
#include "vx_driver.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
// MMIO Address Mappings
#define AFU_ID AFU_ACCEL_UUID
#define MMIO_COPY_IO_ADDRESS 0X120
#define MMIO_COPY_AVM_ADDRESS 0x100
#define MMIO_COPY_DATA_SIZE 0X118
#define MMIO_CMD_TYPE 0X110 // MMIO location set by SW to denote read/write. read: 3; write: 1; vortex: 7
#define MMIO_READY_FOR_CMD 0X198
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_buffer_ {
uint64_t wsid;
volatile void* host_ptr;
uint64_t io_addr;
fpga_handle hdevice;
size_t size;
} vx_buffer_t;
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
}
///////////////////////////////////////////////////////////////////////////////
// Search for an accelerator matching the requested UUID and connect to it
// Convert this to void if required as storing the fpga_handle to params variable
extern vx_device_h vx_dev_open(const char *accel_uuid) {
fpga_properties filter = NULL;
fpga_result res;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
// Set up a filter that will search for an accelerator
fpgaGetProperties(NULL, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
uuid_parse(accel_uuid, guid);
fpgaPropertiesSetGUID(filter, guid);
// Do the search across the available FPGA contexts
num_matches = 1;
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
return NULL;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (FPGA_OK != res) {
return NULL;
}
// Done with token
fpgaDestroyToken(&accel_token);
return accel_handle;
}
// Close the fpga when all the operations are done
extern int vx_dev_close(vx_device_h hdevice) {
if (NULL == hdevice)
return -1;
fpgaClose(hdevice);
return 0;
}
extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
uint64_t io_addr;
vx_buffer_t* buffer;
if (NULL == hdevice)
return NULL;
size_t asize = align_size(size);
res = fpgaPrepareBuffer(hdevice, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
return NULL;
}
// Get the physical address of the buffer in the accelerator
res = fpgaGetIOAddress(hdevice, wsid, &io_addr);
if (FPGA_OK != res) {
fpgaReleaseBuffer(hdevice, wsid);
return NULL;
}
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (NULL == buffer) {
fpgaReleaseBuffer(hdevice, wsid);
return NULL;
}
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = size;
return (vx_buffer_h)buffer;
}
extern volatile void* vs_buf_ptr(vx_buffer_h hbuffer) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
return NULL;
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
return -1;
fpgaReleaseBuffer(buffer->hdevice, buffer->wsid);
free(hbuffer);
return 0;
}
// Check if HW is ready for SW
static int ready_for_sw(fpga_handle hdevice) {
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
do {
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
} while (data != 0x1);
return 0;
}
extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->hdevice) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, dest_addr));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 1)); // WRITE CMD
// Wait for the write operation to finish
return ready_for_sw(buffer->hdevice);
}
extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + dest_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->hdevice) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, src_addr));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 3)); // READ CMD
// Wait for the write operation to finish
return ready_for_sw(buffer->hdevice);
}
extern int vx_start(vx_device_h hdevice) {
if (NULL == hdevice)
return -1;
// Ensure ready for new command
if (ready_for_sw(hdevice) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(hdevice, 0, MMIO_CMD_TYPE, 7)); // START CMD
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (NULL == hdevice)
return -1;
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
do {
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
sleep_time_ms -= sleep_time_ms;
if (timeout <= sleep_time_ms)
break;
} while (data != 0x1);
return 0;
}

View File

@@ -1,47 +0,0 @@
#ifndef __VX_DRIVER_H__
#define __VX_DRIVER_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* vx_device_h;
typedef void* vx_buffer_h;
#define VX_CACHE_LINESIZE 64
// open the device and connect to it
vx_device_h vx_dev_open();
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// Allocate shared buffer with device
vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size);
// Get host pointer address
void* vs_buf_ptr(vx_buffer_h hbuffer);
// release buffer
int vx_buf_release(vx_buffer_h hbuffer);
// Copy bytes from buffer to device local memory
int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, long long timeout);
#ifdef __cplusplus
}
#endif
#endif // __VX_DRIVER_H__

View File

@@ -1,21 +1,15 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
# RISCV_TOOL_PATH ?= /opt/riscv-new/drops
VX_RT_PATH ?= $(wildcard ../../../runtime)
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
VX_CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I../../../hw
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
@@ -32,13 +26,13 @@ SRCS = basic.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DMP) -D kernel.elf > kernel.dump
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CPY) -O binary kernel.elf kernel.bin
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@

View File

@@ -1,6 +1,5 @@
#include <stdint.h>
#include <VX_config.h>
#include "intrinsics/vx_intrinsics.h"
#include <vx_intrinsics.h>
#include "common.h"
void main() {

View File

@@ -1,20 +1,15 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VX_RT_PATH ?= $(wildcard ../../../runtime)
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
#VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
#VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
#VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
VX_CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I../../../hw
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
@@ -29,13 +24,13 @@ SRCS = demo.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DMP) -D kernel.elf > kernel.dump
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CPY) -O binary kernel.elf kernel.bin
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@

BIN
driver/tests/demo/kernel.bin Normal file → Executable file

Binary file not shown.

View File

@@ -1,7 +1,6 @@
#include <stdlib.h>
#include <stdio.h>
#include "intrinsics/vx_intrinsics.h"
#include "vx_api/vx_api.h"
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
void kernel_body(void* arg) {
@@ -20,10 +19,6 @@ void kernel_body(void* arg) {
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
/*printf("stride=%d\n", arg->stride);
printf("src0_ptr=0x%src0\n", arg->src0_ptr);
printf("src1_ptr=0x%src0\n", arg->src1_ptr);
printf("dst_ptr=0x%src0\n", arg->dst_ptr);*/
int num_warps = vx_num_warps();
int num_threads = vx_num_threads();
vx_spawn_warps(num_warps, num_threads, kernel_body, arg);

Binary file not shown.