This commit is contained in:
Blaise Tine
2019-11-25 12:28:37 -05:00
parent 12cf0472b5
commit 83f793edc9
16 changed files with 3374 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,69 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH) -I.
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT = stencil
SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c file.c
#convert_dataset.c mmio.c
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View File

@@ -0,0 +1,617 @@
#include <parboil.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
/*****************************************************************************/
/* Memory management routines */
/* Free an array of owned strings. */
void
pb_FreeStringArray(char **string_array)
{
char **p;
if (!string_array) return;
for (p = string_array; *p; p++) free(*p);
free(string_array);
}
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version)
{
if (name == NULL) {
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
exit(-1);
}
struct pb_PlatformParam *ret =
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
ret->name = name;
ret->version = version;
return ret;
}
void
pb_FreePlatformParam(struct pb_PlatformParam *p)
{
if (p == NULL) return;
free(p->name);
free(p->version);
free(p);
}
struct pb_DeviceParam *
pb_DeviceParam_index(int index)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_INDEX;
ret->index = index;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_cpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_CPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_gpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_GPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_ACCELERATOR;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_name(char *name)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_NAME;
ret->name = name;
return ret;
}
void
pb_FreeDeviceParam(struct pb_DeviceParam *p)
{
if (p == NULL) return;
switch(p->criterion) {
case pb_Device_NAME:
free(p->name);
break;
case pb_Device_INDEX:
case pb_Device_CPU:
case pb_Device_ACCELERATOR:
break;
default:
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
exit(-1);
}
}
void
pb_FreeParameters(struct pb_Parameters *p)
{
free(p->outFile);
pb_FreeStringArray(p->inpFiles);
pb_FreePlatformParam(p->platform);
pb_FreeDeviceParam(p->device);
free(p);
}
/*****************************************************************************/
/* Parse a comma-delimited list of strings into an
* array of strings. */
static char **
read_string_array(char *in)
{
char **ret;
int i;
int count; /* Number of items in the input */
char *substring; /* Current substring within 'in' */
/* Count the number of items in the string */
count = 1;
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
/* Allocate storage */
ret = (char **)malloc((count + 1) * sizeof(char *));
/* Create copies of the strings from the list */
substring = in;
for (i = 0; i < count; i++) {
char *substring_end;
int substring_length;
/* Find length of substring */
for (substring_end = substring;
(*substring_end != ',') && (*substring_end != 0);
substring_end++);
substring_length = substring_end - substring;
/* Allocate memory and copy the substring */
ret[i] = (char *)malloc(substring_length + 1);
memcpy(ret[i], substring, substring_length);
ret[i][substring_length] = 0;
/* go to next substring */
substring = substring_end + 1;
}
ret[i] = NULL; /* Write the sentinel value */
return ret;
}
static void
report_parse_error(const char *str)
{
fputs(str, stderr);
}
/* Interpret a string as a 'pb_DeviceParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_DeviceParam *
read_device_param(char *str)
{
/* Try different ways of interpreting 'device_string' until one works */
/* If argument is an integer, then interpret it as a device index */
errno = 0;
char *end;
long device_int = strtol(str, &end, 10);
if (!errno) {
/* Negative numbers are not valid */
if (device_int < 0 || device_int > INT_MAX) return NULL;
return pb_DeviceParam_index(device_int);
}
/* Match against predefined strings */
if (strcmp(str, "CPU") == 0)
return pb_DeviceParam_cpu();
if (strcmp(str, "GPU") == 0)
return pb_DeviceParam_gpu();
if (strcmp(str, "ACCELERATOR") == 0)
return pb_DeviceParam_accelerator();
/* Assume any other string is a device name */
return pb_DeviceParam_name(strdup(str));
}
/* Interpret a string as a 'pb_PlatformParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_PlatformParam *
read_platform_param(char *str)
{
int separator_index; /* Index of the '-' character separating
* name and version number. It's -1 if
* there's no '-' character. */
/* Find the last occurrence of '-' in 'str' */
{
char *cur;
separator_index = -1;
for (cur = str; *cur; cur++) {
if (*cur == '-') separator_index = cur - str;
}
}
/* The platform name is either the entire string, or all characters before
* the separator */
int name_length = separator_index == -1 ? strlen(str) : separator_index;
char *name_str = (char *)malloc(name_length + 1);
memcpy(name_str, str, name_length);
name_str[name_length] = 0;
/* The version is either NULL, or all characters after the separator */
char *version_str;
if (separator_index == -1) {
version_str = NULL;
}
else {
const char *version_input_str = str + separator_index + 1;
int version_length = strlen(version_input_str);
version_str = (char *)malloc(version_length + 1);
memcpy(version_str, version_input_str, version_length);
version_str[version_length] = 0;
}
/* Create output structure */
return pb_PlatformParam(name_str, version_str);
}
/****************************************************************************/
/* Argument parsing state */
/* Argument parsing state.
*
* Arguments that are interpreted by the argument parser are removed from
* the list. Variables 'argc' and 'argn' do not count arguments that have
* been removed.
*
* During argument parsing, the array of arguments is compacted, overwriting
* the erased arguments. Variable 'argv_put' points to the array element
* where the next argument will be written. Variable 'argv_get' points to
* the array element where the next argument will be read from.
*/
struct argparse {
int argc; /* Number of arguments. Mutable. */
int argn; /* Current argument index. */
char **argv_get; /* Argument value being read. */
char **argv_put; /* Argument value being written.
* argv_put <= argv_get. */
};
static void
initialize_argparse(struct argparse *ap, int argc, char **argv)
{
ap->argc = argc;
ap->argn = 0;
ap->argv_get = ap->argv_put = argv;
}
/* Finish argument parsing, without processing the remaining arguments.
* Write new argument count into _argc. */
static void
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
{
/* Move the remaining arguments */
for(; ap->argn < ap->argc; ap->argn++)
*ap->argv_put++ = *ap->argv_get++;
/* Update the argument count */
*_argc = ap->argc;
/* Insert a terminating NULL */
argv[ap->argc] = NULL;
}
/* Delete the current argument. The argument will not be visible
* when argument parsing is done. */
static void
delete_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "delete_argument\n");
}
ap->argc--;
ap->argv_get++;
}
/* Go to the next argument. Also, move the current argument to its
* final location in argv. */
static void
next_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "next_argument\n");
}
/* Move argument to its new location. */
*ap->argv_put++ = *ap->argv_get++;
ap->argn++;
}
static int
is_end_of_arguments(struct argparse *ap)
{
return ap->argn == ap->argc;
}
/* Get the current argument */
static char *
get_argument(struct argparse *ap)
{
return *ap->argv_get;
}
/* Get the current argument, and also delete it */
static char *
consume_argument(struct argparse *ap)
{
char *ret = get_argument(ap);
delete_argument(ap);
return ret;
}
/****************************************************************************/
/* The result of parsing a command-line argument */
typedef enum {
ARGPARSE_OK, /* Success */
ARGPARSE_ERROR, /* Error */
ARGPARSE_DONE /* Success, and do not continue parsing */
} result;
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
/* A command-line option */
struct option {
char short_name; /* If not 0, the one-character
* name of this option */
const char *long_name; /* If not NULL, the long name of this option */
parse_action *action; /* What to do when this option occurs.
* Sentinel value is NULL.
*/
};
/* Output file
*
* -o FILE
*/
static result
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-o'\n");
return ARGPARSE_ERROR;
}
/* Replace the output file name */
free(params->outFile);
params->outFile = strdup(consume_argument(ap));
return ARGPARSE_OK;
}
/* Input files
*
* -i FILE,FILE,...
*/
static result
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-i'\n");
return ARGPARSE_ERROR;
}
/* Replace the input file list */
pb_FreeStringArray(params->inpFiles);
params->inpFiles = read_string_array(consume_argument(ap));
return ARGPARSE_OK;
}
/* End of options
*
* --
*/
static result
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
{
return ARGPARSE_DONE;
}
/* OpenCL device
*
* --device X
*/
static result
parse_device(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a device */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--device'\n");
return ARGPARSE_ERROR;
}
char *device_string = consume_argument(ap);
struct pb_DeviceParam *device_param = read_device_param(device_string);
if (!device_param) {
report_parse_error("Unrecognized device specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreeDeviceParam(params->device);
params->device = device_param;
return ARGPARSE_OK;
}
static result
parse_platform(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a platform */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--platform'\n");
return ARGPARSE_ERROR;
}
char *platform_string = consume_argument(ap);
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
if (!platform_param) {
report_parse_error("Unrecognized platform specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreePlatformParam(params->platform);
params->platform = platform_param;
return ARGPARSE_OK;
}
static struct option options[] = {
{ 'o', NULL, &parse_output_file },
{ 'i', NULL, &parse_input_files },
{ '-', NULL, &parse_end_options },
{ 0, "device", &parse_device },
{ 0, "platform", &parse_platform },
{ 0, NULL, NULL }
};
static int
is_last_option(struct option *op)
{
return op->action == NULL;
}
/****************************************************************************/
/* Parse command-line parameters.
* Return zero on error, nonzero otherwise.
* On error, the other outputs may be invalid.
*
* The information collected from parameters is used to update
* 'ret'. 'ret' should be initialized.
*
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
*/
static int
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
{
char *err_message;
struct argparse ap;
/* Each argument */
initialize_argparse(&ap, *_argc, argv);
while(!is_end_of_arguments(&ap)) {
result arg_result; /* Result of parsing this option */
char *arg = get_argument(&ap);
/* Process this argument */
if (arg[0] == '-') {
/* Single-character flag */
if ((arg[1] != 0) && (arg[2] == 0)) {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching short option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->short_name == arg[1]) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
/* Long flag */
if (arg[1] == '-') {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching long option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
}
else {
/* Other arguments are ignored */
next_argument(&ap);
arg_result = ARGPARSE_OK;
goto option_was_processed;
}
option_was_processed:
/* Decide what to do next based on 'arg_result' */
switch(arg_result) {
case ARGPARSE_OK:
/* Continue processing */
break;
case ARGPARSE_ERROR:
/* Error exit from the function */
return 0;
case ARGPARSE_DONE:
/* Normal exit from the argument parsing loop */
goto end_of_options;
}
} /* end for each argument */
/* If all arguments were processed, then normal exit from the loop */
end_of_options:
finalize_argparse(&ap, _argc, argv);
return 1;
}
/*****************************************************************************/
/* Other exported functions */
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv)
{
struct pb_Parameters *ret =
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
/* Initialize the parameters structure */
ret->outFile = NULL;
ret->inpFiles = (char **)malloc(sizeof(char *));
ret->inpFiles[0] = NULL;
ret->platform = NULL;
ret->device = NULL;
/* Read parameters and update _argc, argv */
if (!pb_ParseParameters(ret, _argc, argv)) {
/* Parse error */
pb_FreeParameters(ret);
return NULL;
}
return ret;
}
int
pb_Parameters_CountInputs(struct pb_Parameters *p)
{
int n;
for (n = 0; p->inpFiles[n]; n++);
return n;
}

View File

@@ -0,0 +1,78 @@
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#if __BYTE_ORDER != __LITTLE_ENDIAN
# error "File I/O is not implemented for this system: wrong endianness."
#endif
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt)
{
FILE* fid = fopen(fName, "rb");
if (fid == NULL)
{
fprintf(stderr, "Cannot open input file\n");
exit(-1);
}
fscanf(fid, "%d %d %d %d %d\n",len,depth,nzcnt_len,dim,pad);
int _len=len[0];
int _depth=depth[0];
int _dim=dim[0];
int _pad=pad[0];
int _nzcnt_len=nzcnt_len[0];
*h_data = (float *) malloc(_len * sizeof (float));
fread (*h_data, sizeof (float), _len, fid);
*h_indices = (int *) malloc(_len * sizeof (int));
fread (*h_indices, sizeof (int), _len, fid);
*h_ptr = (int *) malloc(_depth * sizeof (int));
fread (*h_ptr, sizeof (int), _depth, fid);
*h_perm = (int *) malloc(_dim * sizeof (int));
fread (*h_perm, sizeof (int), _dim, fid);
*h_nzcnt = (int *) malloc(_nzcnt_len * sizeof (int));
fread (*h_nzcnt, sizeof (int), _nzcnt_len, fid);
fclose (fid);
}
void input_vec(char *fName,float *h_vec,int dim)
{
FILE* fid = fopen(fName, "rb");
fread (h_vec, sizeof (float), dim, fid);
fclose(fid);
}
void outputData(char* fName, float *h_Ax_vector,int dim)
{
FILE* fid = fopen(fName, "w");
uint32_t tmp32;
if (fid == NULL)
{
fprintf(stderr, "Cannot open output file\n");
exit(-1);
}
tmp32 = dim;
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
fwrite(h_Ax_vector, sizeof(float), dim, fid);
fclose (fid);
}

View File

@@ -0,0 +1,18 @@
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __FILEH__
#define __FILEH__
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt);
void input_vec(char* fNanme, float *h_vec,int dim);
void outputData(char* fName, float *h_Ax_vector,int dim);
#endif

View File

@@ -0,0 +1,55 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#include "gpu_info.h"
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm)
{
int max_thread;
int max_block=8;
if(major==1)
{
if(minor>=2)
max_thread=1024;
else
max_thread=768;
}
else if(major==2)
max_thread=1536;
else
//newer GPU //keep using 2.0
max_thread=1536;
int _grid;
int _thread;
if(task*pad>sm*max_thread)
{
_thread=max_thread/max_block;
_grid = ((task*pad+_thread-1)/_thread)*_thread;
}
else
{
_thread=pad;
_grid=task*pad;
}
thread[0]=_thread;
grid[0]=_grid;
}

View File

@@ -0,0 +1,20 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __GPUINFOH__
#define __GPUINFOH__
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm);
#endif

View File

@@ -0,0 +1,28 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#define Index3D(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k)))
__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz)
{
int i = get_global_id(0)+1;
int j = get_global_id(1)+1;
int k = get_global_id(2)+1;
if(i<nx-1)
{
Anext[Index3D (nx, ny, i, j, k)] = c1 *
( A0[Index3D (nx, ny, i, j, k + 1)] +
A0[Index3D (nx, ny, i, j, k - 1)] +
A0[Index3D (nx, ny, i, j + 1, k)] +
A0[Index3D (nx, ny, i, j - 1, k)] +
A0[Index3D (nx, ny, i + 1, j, k)] +
A0[Index3D (nx, ny, i - 1, j, k)] )
- A0[Index3D (nx, ny, i, j, k)] * c0;
}
}

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,253 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <assert.h>
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <parboil.h>
//#include "file.h"
#include "ocl.h"
//#include "common.h"
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
static int read_data(float *A0, int nx,int ny,int nz,FILE *fp)
{
int s=0;
int i,j,k;
for(i=0;i<nz;i++)
{
for(j=0;j<ny;j++)
{
for(k=0;k<nx;k++)
{
//fread(A0+s,sizeof(float),1,fp);
A0[s] = k;
s++;
}
}
}
return 0;
}
int main(int argc, char** argv) {
struct pb_TimerSet timers;
struct pb_Parameters *parameters;
printf("OpenCL accelerated 7 points stencil codes****\n");
printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n");
//parameters = pb_ReadParameters(&argc, argv);
/*parameters->inpFiles = (char **)malloc(sizeof(char *) * 2);
parameters->inpFiles[0] = (char *)malloc(100);
parameters->inpFiles[1] = NULL;
strncpy(parameters->inpFiles[0], "128x128x32.bin", 100);*/
pb_InitializeTimerSet(&timers);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//declaration
int nx,ny,nz;
int size;
int iteration;
float c0=1.0f/6.0f;
float c1=1.0f/6.0f/6.0f;
/*if (argc<5)
{
printf("Usage: probe nx ny nz t\n"
"nx: the grid size x\n"
"ny: the grid size y\n"
"nz: the grid size z\n"
"t: the iteration time\n");
return -1;
}
nx = atoi(argv[1]);
if (nx<1)
return -1;
ny = atoi(argv[2]);
if (ny<1)
return -1;
nz = atoi(argv[3]);
if (nz<1)
return -1;
iteration = atoi(argv[4]);
if(iteration<1)
return -1;*/
nx = 64;
ny = 64;
nz = 4;
iteration = 2;
cl_int clStatus;
cl_context clContext;
cl_device_id clDevice;
cl_platform_id clPlatform;
// Below is the new interface, coupled with Parboil runtime.
pb_Context* pb_context;
pb_context = pb_InitOpenCLContext(parameters);
if (pb_context == NULL) {
fprintf (stderr, "Error: No OpenCL platform/device can be found.");
return -1;
}
printf("OK\n");
// okay, let's deliver actual variables
clPlatform = (cl_platform_id) pb_context->clPlatformId;
clContext = (cl_context) pb_context->clContext;
clDevice = (cl_device_id) pb_context->clDeviceId;
cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
CHECK_ERROR("clCreateCommandQueue")
printf("OK\n");
pb_SetOpenCL(&clContext, &clCommandQueue);
//const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
//cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
cl_program clProgram = clCreateProgramWithBuiltInKernels(
clContext, 1, &clDevice, "naive_kernel", &clStatus);
CHECK_ERROR("clCreateProgramWithSource")
char clOptions[50];
sprintf(clOptions,"-I src/opencl_base");
clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
CHECK_ERROR("clBuildProgram")
cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
CHECK_ERROR("clCreateKernel")
printf("OK+\n");
//host data
float *h_A0;
float *h_Anext;
//device
cl_mem d_A0;
cl_mem d_Anext;
//load data from files
size=nx*ny*nz;
h_A0=(float*)malloc(sizeof(float)*size);
h_Anext=(float*)malloc(sizeof(float)*size);
pb_SwitchToTimer(&timers, pb_TimerID_IO);
//FILE *fp = fopen(parameters->inpFiles[0], "rb");
printf("OK+\n");
read_data(h_A0, nx,ny,nz,NULL);
printf("OK+\n");
//fclose(fp);
memcpy (h_Anext,h_A0,sizeof(float)*size);
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
printf("OK+\n");
//memory allocation
d_A0 = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
d_Anext = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
//memory copy
clStatus = clEnqueueWriteBuffer(clCommandQueue,d_A0,CL_FALSE,0,size*sizeof(float),h_A0,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
printf("OK+\n");
//only use 1D thread block
int tx = 256;
size_t block[3] = {tx,1,1};
size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2};
//size_t grid[3] = {nx-2,ny-2,nz-2};
size_t offset[3] = {1,1,1};
printf("block size in x/y/z = %d %d %d\n",block[0],block[1],block[2]);
printf("grid size in x/y/z = %d %d %d\n",grid[0],grid[1],grid[2]);
printf ("blocks = %d\n", (grid[0]/block[0])*(grid[1]/block[1])*(grid[2]*block[2]));
clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0);
clStatus = clSetKernelArg(clKernel,1,sizeof(float),(void*)&c1);
clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0);
clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext);
clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&nx);
clStatus = clSetKernelArg(clKernel,5,sizeof(int),(void*)&ny);
clStatus = clSetKernelArg(clKernel,6,sizeof(int),(void*)&nz);
CHECK_ERROR("clSetKernelArg")
//main execution
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
int t;
for(t=0;t<iteration;t++)
{
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,grid,block,0,NULL,NULL);
//printf("iteration %d\n",t)
CHECK_ERROR("clEnqueueNDRangeKernel")
cl_mem d_temp = d_A0;
d_A0 = d_Anext;
d_Anext = d_temp;
clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0);
clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext);
}
cl_mem d_temp = d_A0;
d_A0 = d_Anext;
d_Anext = d_temp;
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
clStatus = clEnqueueReadBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL);
CHECK_ERROR("clEnqueueReadBuffer")
clStatus = clReleaseMemObject(d_A0);
clStatus = clReleaseMemObject(d_Anext);
clStatus = clReleaseKernel(clKernel);
clStatus = clReleaseProgram(clProgram);
clStatus = clReleaseCommandQueue(clCommandQueue);
clStatus = clReleaseContext(clContext);
CHECK_ERROR("clReleaseContext")
if (parameters->outFile) {
pb_SwitchToTimer(&timers, pb_TimerID_IO);
//outputData(parameters->outFile,h_Anext,nx,ny,nz);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//free((void*)clSource[0]);
free(h_A0);
free(h_Anext);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(parameters);
return 0;
}

View File

@@ -0,0 +1,50 @@
#include <CL/cl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "ocl.h"
char* readFile(const char* fileName)
{
FILE* fp;
fp = fopen(fileName,"r");
if(fp == NULL)
{
printf("Error 1!\n");
exit(1);
}
fseek(fp,0,SEEK_END);
long size = ftell(fp);
rewind(fp);
char* buffer = (char*)malloc(sizeof(char)*(size+1));
if(buffer == NULL)
{
printf("Error 2!\n");
fclose(fp);
exit(1);
}
size_t res = fread(buffer,1,size,fp);
if(res != size)
{
printf("Error 3!\n");
fclose(fp);
exit(1);
}
buffer[size] = 0;
fclose(fp);
return buffer;
}
void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
{
cl_int clStatus;
char* temp = (char*)malloc(size);
memset(temp,val,size);
clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
free(temp);
}

View File

@@ -0,0 +1,21 @@
#ifndef __OCLH__
#define __OCLH__
typedef struct {
cl_uint major;
cl_uint minor;
cl_uint multiProcessorCount;
} OpenCLDeviceProp;
void clMemSet(cl_command_queue, cl_mem, int, size_t);
char* readFile(const char*);
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
#endif

View File

@@ -0,0 +1,427 @@
/*
* (c) 2007 The Board of Trustees of the University of Illinois.
*/
#include <parboil.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if _POSIX_VERSION >= 200112L
# include <sys/time.h>
#endif
/*****************************************************************************/
/* Timer routines */
static void
accumulate_time(pb_Timestamp *accum,
pb_Timestamp start,
pb_Timestamp end)
{
#if _POSIX_VERSION >= 200112L
*accum += end - start;
#else
# error "Timestamps not implemented for this system"
#endif
}
#if _POSIX_VERSION >= 200112L
static pb_Timestamp get_time()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
}
#else
# error "no supported time libraries are available on this platform"
#endif
void
pb_ResetTimer(struct pb_Timer *timer)
{
timer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
timer->elapsed = 0;
#else
# error "pb_ResetTimer: not implemented for this system"
#endif
}
void
pb_StartTimer(struct pb_Timer *timer)
{
if (timer->state != pb_Timer_STOPPED) {
fputs("Ignoring attempt to start a running timer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
}
void
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
{
unsigned int numNotStopped = 0x3; // 11
if (timer->state != pb_Timer_STOPPED) {
fputs("Warning: Timer was not stopped\n", stderr);
numNotStopped &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_STOPPED) {
fputs("Warning: Subtimer was not stopped\n", stderr);
numNotStopped &= 0x2; // Zero out 2^0
}
if (numNotStopped == 0x0) {
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
subtimer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
if (numNotStopped & 0x2) {
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
if (numNotStopped & 0x1) {
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
}
void
pb_StopTimer(struct pb_Timer *timer)
{
pb_Timestamp fini;
if (timer->state != pb_Timer_RUNNING) {
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
}
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
pb_Timestamp fini;
unsigned int numNotRunning = 0x3; // 0b11
if (timer->state != pb_Timer_RUNNING) {
fputs("Warning: Timer was not running\n", stderr);
numNotRunning &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_RUNNING) {
fputs("Warning: Subtimer was not running\n", stderr);
numNotRunning &= 0x2; // Zero out 2^0
}
if (numNotRunning == 0x0) {
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
subtimer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
if (numNotRunning & 0x2) {
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
}
if (numNotRunning & 0x1) {
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
subtimer->init = fini;
}
}
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer)
{
double ret;
if (timer->state != pb_Timer_STOPPED) {
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
}
#if _POSIX_VERSION >= 200112L
ret = timer->elapsed / 1e6;
#else
# error "pb_GetElapsedTime: not implemented for this system"
#endif
return ret;
}
void
pb_InitializeTimerSet(struct pb_TimerSet *timers)
{
int n;
timers->wall_begin = get_time();
timers->current = pb_TimerID_NONE;
timers->async_markers = NULL;
for (n = 0; n < pb_TimerID_LAST; n++) {
pb_ResetTimer(&timers->timers[n]);
timers->sub_timer_list[n] = NULL; // free first?
}
}
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
(sizeof(struct pb_SubTimer));
int len = strlen(label);
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
sprintf(subtimer->label, "%s\0", label);
pb_ResetTimer(&subtimer->timer);
subtimer->next = NULL;
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
if (subtimerlist == NULL) {
subtimerlist = (struct pb_SubTimerList *) malloc
(sizeof(struct pb_SubTimerList));
subtimerlist->subtimer_list = subtimer;
timers->sub_timer_list[pb_Category] = subtimerlist;
} else {
// Append to list
struct pb_SubTimer *element = subtimerlist->subtimer_list;
while (element->next != NULL) {
element = element->next;
}
element->next = subtimer;
}
}
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
{
// switchToSub( NULL, NONE
// switchToSub( NULL, some
// switchToSub( some, some
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
struct pb_Timer *topLevelToStop = NULL;
if (timers->current != category && timers->current != pb_TimerID_NONE) {
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
// NONE shouldn't have a timer associated with it, so exclude from branch
topLevelToStop = &timers->timers[timers->current];
}
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
if (timers->current != pb_TimerID_NONE) {
if (curr != NULL && topLevelToStop != NULL) {
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
} else if (curr != NULL) {
pb_StopTimer(&curr->timer);
} else {
pb_StopTimer(topLevelToStop);
}
}
subtimerlist = timers->sub_timer_list[category];
struct pb_SubTimer *subtimer = NULL;
if (label != NULL) {
subtimer = subtimerlist->subtimer_list;
while (subtimer != NULL) {
if (strcmp(subtimer->label, label) == 0) {
break;
} else {
subtimer = subtimer->next;
}
}
}
if (category != pb_TimerID_NONE) {
if (subtimerlist != NULL) {
subtimerlist->current = subtimer;
}
if (category != timers->current && subtimer != NULL) {
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
} else if (subtimer != NULL) {
// Same category, different non-NULL subtimer
pb_StartTimer(&subtimer->timer);
} else{
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
pb_StartTimer(&timers->timers[category]);
}
}
timers->current = category;
}
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
{
/* Stop the currently running timer */
/*if (timers->current != pb_TimerID_NONE) {
struct pb_SubTimer *currSubTimer = NULL;
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
if ( subtimerlist != NULL) {
currSubTimer = timers->sub_timer_list[timers->current]->current;
}
if ( currSubTimer!= NULL) {
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
} else {
pb_StopTimer(&timers->timers[timers->current]);
}
}
timers->current = timer;
if (timer != pb_TimerID_NONE) {
pb_StartTimer(&timers->timers[timer]);
}*/
}
void
pb_PrintTimerSet(struct pb_TimerSet *timers)
{
pb_Timestamp wall_end = get_time();
struct pb_Timer *t = timers->timers;
struct pb_SubTimer* sub = NULL;
int maxSubLength;
const char *categories[] = {
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
};
const int maxCategoryLength = 10;
int i;
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
if(pb_GetElapsedTime(&t[i]) != 0) {
// Print Category Timer
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
if (timers->sub_timer_list[i] != NULL) {
sub = timers->sub_timer_list[i]->subtimer_list;
maxSubLength = 0;
while (sub != NULL) {
// Find longest SubTimer label
if (strlen(sub->label) > maxSubLength) {
maxSubLength = strlen(sub->label);
}
sub = sub->next;
}
// Fit to Categories
if (maxSubLength <= maxCategoryLength) {
maxSubLength = maxCategoryLength;
}
sub = timers->sub_timer_list[i]->subtimer_list;
// Print SubTimers
while (sub != NULL) {
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
sub = sub->next;
}
}
}
}
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
float walltime = (wall_end - timers->wall_begin)/ 1e6;
printf("Timer Wall Time: %f\n", walltime);
}
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
{
/* clean up all of the async event markers */
struct pb_async_time_marker_list ** event = &(timers->async_markers);
while( *event != NULL) {
struct pb_async_time_marker_list ** next = &((*event)->next);
free(*event);
(*event) = NULL;
event = next;
}
int i = 0;
for(i = 0; i < pb_TimerID_LAST; ++i) {
if (timers->sub_timer_list[i] != NULL) {
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
struct pb_SubTimer *prev = NULL;
while (subtimer != NULL) {
free(subtimer->label);
prev = subtimer;
subtimer = subtimer->next;
free(prev);
}
free(timers->sub_timer_list[i]);
}
}
}

View File

@@ -0,0 +1,348 @@
/*
* (c) 2010 The Board of Trustees of the University of Illinois.
*/
#ifndef PARBOIL_HEADER
#define PARBOIL_HEADER
#include <stdio.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#include <unistd.h>
/* A platform as specified by the user on the command line */
struct pb_PlatformParam {
char *name; /* The platform name. This string is owned. */
char *version; /* The platform version; may be NULL.
* This string is owned. */
};
/* Create a PlatformParam from the given strings.
* 'name' must not be NULL. 'version' may be NULL.
* If not NULL, the strings should have been allocated by malloc(),
* and they will be owned by the returned object.
*/
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version);
void
pb_FreePlatformParam(struct pb_PlatformParam *);
/* A criterion for how to select a device */
enum pb_DeviceSelectionCriterion {
pb_Device_INDEX, /* Enumerate the devices and select one
* by its number */
pb_Device_CPU, /* Select a CPU device */
pb_Device_GPU, /* Select a GPU device */
pb_Device_ACCELERATOR, /* Select an accelerator device */
pb_Device_NAME /* Select a device by name */
};
/* A device as specified by the user on the command line */
struct pb_DeviceParam {
enum pb_DeviceSelectionCriterion criterion;
union {
int index; /* If criterion == pb_Device_INDEX,
* the index of the device */
char *name; /* If criterion == pb_Device_NAME,
* the name of the device.
* This string is owned. */
};
};
struct pb_DeviceParam *
pb_DeviceParam_index(int index);
struct pb_DeviceParam *
pb_DeviceParam_cpu(void);
struct pb_DeviceParam *
pb_DeviceParam_gpu(void);
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void);
/* Create a by-name device selection criterion.
* The string should have been allocated by malloc(), and it will will be
* owned by the returned object.
*/
struct pb_DeviceParam *
pb_DeviceParam_name(char *name);
void
pb_FreeDeviceParam(struct pb_DeviceParam *);
/* Command line parameters for benchmarks */
struct pb_Parameters {
char *outFile; /* If not NULL, the raw output of the
* computation should be saved to this
* file. The string is owned. */
char **inpFiles; /* A NULL-terminated array of strings
* holding the input file(s) for the
* computation. The array and strings
* are owned. */
struct pb_PlatformParam *platform; /* If not NULL, the platform
* specified on the command line. */
struct pb_DeviceParam *device; /* If not NULL, the device
* specified on the command line. */
};
/* Read command-line parameters.
*
* The argc and argv parameters to main are read, and any parameters
* interpreted by this function are removed from the argument list.
*
* A new instance of struct pb_Parameters is returned.
* If there is an error, then an error message is printed on stderr
* and NULL is returned.
*/
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv);
/* Free an instance of struct pb_Parameters.
*/
void
pb_FreeParameters(struct pb_Parameters *p);
void
pb_FreeStringArray(char **);
/* Count the number of input files in a pb_Parameters instance.
*/
int
pb_Parameters_CountInputs(struct pb_Parameters *p);
/* A time or duration. */
//#if _POSIX_VERSION >= 200112L
typedef unsigned long long pb_Timestamp; /* time in microseconds */
//#else
//# error "Timestamps not implemented"
//#endif
enum pb_TimerState {
pb_Timer_STOPPED,
pb_Timer_RUNNING,
};
struct pb_Timer {
enum pb_TimerState state;
pb_Timestamp elapsed; /* Amount of time elapsed so far */
pb_Timestamp init; /* Beginning of the current time interval,
* if state is RUNNING. End of the last
* recorded time interfal otherwise. */
};
/* Reset a timer.
* Use this to initialize a timer or to clear
* its elapsed time. The reset timer is stopped.
*/
void
pb_ResetTimer(struct pb_Timer *timer);
/* Start a timer. The timer is set to RUNNING mode and
* time elapsed while the timer is running is added to
* the timer.
* The timer should not already be running.
*/
void
pb_StartTimer(struct pb_Timer *timer);
/* Stop a timer.
* This stops adding elapsed time to the timer.
* The timer should not already be stopped.
*/
void
pb_StopTimer(struct pb_Timer *timer);
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer);
/* Execution time is assigned to one of these categories. */
enum pb_TimerID {
pb_TimerID_NONE = 0,
pb_TimerID_IO, /* Time spent in input/output */
pb_TimerID_KERNEL, /* Time spent computing on the device,
* recorded asynchronously */
pb_TimerID_COPY, /* Time spent synchronously moving data
* to/from device and allocating/freeing
* memory on the device */
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
* driver, primarily for recording the time
* spent queueing asynchronous operations */
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
pb_TimerID_COMPUTE, /* Time for all program execution other
* than parsing command line arguments,
* I/O, kernel, and copy */
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
* host activity: automatically filled in,
* not intended for direct usage */
pb_TimerID_LAST /* Number of timer IDs */
};
/* Dynamic list of asynchronously tracked times between events */
struct pb_async_time_marker_list {
char *label; // actually just a pointer to a string
enum pb_TimerID timerID; /* The ID to which the interval beginning
* with this marker should be attributed */
void * marker;
//cudaEvent_t marker; /* The driver event for this marker */
struct pb_async_time_marker_list *next;
};
struct pb_SubTimer {
char *label;
struct pb_Timer timer;
struct pb_SubTimer *next;
};
struct pb_SubTimerList {
struct pb_SubTimer *current;
struct pb_SubTimer *subtimer_list;
};
/* A set of timers for recording execution times. */
struct pb_TimerSet {
enum pb_TimerID current;
struct pb_async_time_marker_list* async_markers;
pb_Timestamp async_begin;
pb_Timestamp wall_begin;
struct pb_Timer timers[pb_TimerID_LAST];
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
};
/* Reset all timers in the set. */
void
pb_InitializeTimerSet(struct pb_TimerSet *timers);
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
/* Select which timer the next interval of time should be accounted
* to. The selected timer is started and other timers are stopped.
* Using pb_TimerID_NONE stops all timers. */
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
/* Print timer values to standard output. */
void
pb_PrintTimerSet(struct pb_TimerSet *timers);
/* Release timer resources */
void
pb_DestroyTimerSet(struct pb_TimerSet * timers);
void
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
typedef struct pb_Device_tag {
char* name;
void* clDevice;
int id;
unsigned int in_use;
unsigned int available;
} pb_Device;
struct pb_Context_tag;
typedef struct pb_Context_tag pb_Context;
typedef struct pb_Platform_tag {
char* name;
char* version;
void* clPlatform;
unsigned int in_use;
pb_Context** contexts;
pb_Device** devices;
} pb_Platform;
struct pb_Context_tag {
void* clPlatformId;
void* clContext;
void* clDeviceId;
pb_Platform* pb_platform;
pb_Device* pb_device;
};
// verbosely print out list of platforms and their devices to the console.
pb_Platform**
pb_GetPlatforms();
// Choose a platform according to the given platform specification
pb_Platform*
pb_GetPlatform(struct pb_PlatformParam *platform);
// choose a platform: by name, name & version
pb_Platform*
pb_GetPlatformByName(const char* name);
pb_Platform*
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
// Choose a device according to the given device specification
pb_Device*
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
pb_Device**
pb_GetDevices(pb_Platform* pb_platform);
// choose a device by name.
pb_Device*
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
pb_Platform*
pb_GetPlatformByEnvVars();
pb_Context*
pb_InitOpenCLContext(struct pb_Parameters* parameters);
void
pb_ReleasePlatforms();
void
pb_ReleaseContext(pb_Context* c);
void
pb_PrintPlatformInfo(pb_Context* c);
void
perf_init();
//#define MEASURE_KERNEL_TIME
#include <CL/cl.h>
#ifdef MEASURE_KERNEL_TIME
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
cl_int
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#endif
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
void pb_sig_float(char*, float*, int);
void pb_sig_double(char*, double*, int);
void pb_sig_short(char*, short*, int);
void pb_sig_int(char*, int*, int);
void pb_sig_uchar(char*, unsigned char*, unsigned int);
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
#ifdef __cplusplus
}
#endif
#endif //PARBOIL_HEADER

File diff suppressed because it is too large Load Diff