Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

View File

@@ -1,69 +1,9 @@
XLEN ?= 32
RISCV_TOOLCHAIN_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops)
POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc)
POCL_INC_PATH ?= $(wildcard ../include)
POCL_LIB_PATH ?= $(wildcard ../lib)
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
VX_SIMX_PATH ?= $(wildcard ../../../simx/obj_dir)
CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VORTEX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VORTEX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VORTEX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VORTEX_RT_PATH)/io/vx_io.S $(VORTEX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VORTEX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VORTEX_RT_PATH)/tests/tests.c
VX_SRCS += $(VORTEX_RT_PATH)/vx_api/vx_api.c
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/startup/vx_link$(XLEN).ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH) -I.
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = $(VORTEX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
PROJECT = lbm
SRCS = main.cc args.c parboil_opencl.c gpu_info.c lbm.c ocl.c
all: $(PROJECT).dump $(PROJECT).hex
CXXFLAGS += -I.
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
OPTS ?=
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
include ../common.mk

View File

@@ -9,6 +9,10 @@
#ifndef __GPUINFOH__
#define __GPUINFOH__
#ifdef __cplusplus
extern "C" {
#endif
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
@@ -17,4 +21,8 @@ void compute_active_thread(size_t *thread,
int minor,
int sm);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -6,16 +6,16 @@
*cr
***************************************************************************/
/*############################################################################*/
#ifndef _LBM_H_
#define _LBM_H_
/*############################################################################*/
#include "ocl.h"
#include "lbm_macros.h"
#ifdef __cplusplus
extern "C" {
#endif
void LBM_allocateGrid( float** ptr );
void LBM_freeGrid( float** ptr );
void LBM_initializeGrid( LBM_Grid grid );
@@ -34,6 +34,8 @@ void OpenCL_LBM_initializeGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid
void OpenCL_LBM_getDeviceGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid h_grid );
void OpenCL_LBM_performStreamCollide( const OpenCL_Param* prm, cl_mem srcGrid, cl_mem dstGrid );
/*############################################################################*/
#ifdef __cplusplus
}
#endif
#endif /* _LBM_H_ */

Binary file not shown.

View File

@@ -21,6 +21,26 @@
#include "main.h"
#include "ocl.h"
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return CL_INVALID_VALUE;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return CL_INVALID_VALUE;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return CL_SUCCESS;
}
/*############################################################################*/
@@ -170,8 +190,6 @@ void MAIN_initialize(const MAIN_Param *param, const OpenCL_Param *prm) {
LBM_freeGrid((float **)&TEMP_srcGrid);
LBM_freeGrid((float **)&TEMP_dstGrid);
printf("OK\n");
}
/*############################################################################*/
@@ -188,7 +206,9 @@ void MAIN_finalize(const MAIN_Param *param, const OpenCL_Param *prm) {
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
LBM_showGridStatistics(TEMP_srcGrid);
LBM_storeVelocityField(TEMP_srcGrid, param->resultFilename, TRUE);
if (param->resultFilename) {
LBM_storeVelocityField(TEMP_srcGrid, param->resultFilename, TRUE);
}
LBM_freeGrid((float **)&TEMP_srcGrid);
OpenCL_LBM_freeGrid(OpenCL_srcGrid);
@@ -220,8 +240,14 @@ void OpenCL_initialize(struct pb_Parameters *p, OpenCL_Param *prm) {
//const char *clSource[] = {readFile("src/opencl_base/kernel.cl")};
//prm->clProgram = clCreateProgramWithSource(prm->clContext, 1, clSource, NULL, &clStatus);
prm->clProgram = clCreateProgramWithBuiltInKernels(
prm->clContext, 1, &prm->clDevice, "performStreamCollide_kernel", &clStatus);
// read kernel binary from file
uint8_t *kernel_bin = NULL;
size_t kernel_size;
cl_int binary_status = 0;
clStatus = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
CHECK_ERROR("read_kernel_file")
prm->clProgram = clCreateProgramWithBinary(
prm->clContext, 1, &prm->clDevice, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &clStatus);
CHECK_ERROR("clCreateProgramWithSource")
//char clOptions[100];

View File

@@ -1,6 +1,10 @@
#ifndef __OCLH__
#define __OCLH__
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
cl_platform_id clPlatform;
cl_context_properties clCps[3];
@@ -22,4 +26,8 @@ typedef struct {
char* readFile(char*);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1216,9 +1216,24 @@ pb_InitOpenCLContext(struct pb_Parameters* parameters) {
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
clGetPlatformIDs(1, &platform_id, NULL);
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
_err = clGetPlatformIDs(1, &platform_id, NULL);
if (_err != CL_SUCCESS) {
fprintf(stderr, "Error querying platform!\n");
exit(-1);
}
_err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
if (_err != CL_SUCCESS) {
fprintf(stderr, "Error querying device IDs!\n");
exit(-1);
}
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
if (_err != CL_SUCCESS) {
fprintf(stderr, "Error Creating device context!\n");
exit(-1);
}
pb_Context* c = (pb_Context*)malloc(sizeof(pb_Context));
c->clContext = context;