diff --git a/benchmarks/opencl/mri-q/32_32_32_dataset.bin b/benchmarks/opencl/mri-q/32_32_32_dataset.bin new file mode 100755 index 00000000..db8385bb Binary files /dev/null and b/benchmarks/opencl/mri-q/32_32_32_dataset.bin differ diff --git a/benchmarks/opencl/mri-q/Makefile b/benchmarks/opencl/mri-q/Makefile new file mode 100644 index 00000000..55c9b3c6 --- /dev/null +++ b/benchmarks/opencl/mri-q/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) +POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) +POCL_INC_PATH = $(wildcard ../include) +POCL_LIB_PATH = $(wildcard ../lib) +VX_RT_PATH = $(wildcard ../../../runtime) +VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) -I. + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a + +PROJECT = mri-q + +SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c file.cc computeQ.c + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/opencl/mri-q/args.c b/benchmarks/opencl/mri-q/args.c new file mode 100644 index 00000000..9d751e29 --- /dev/null +++ b/benchmarks/opencl/mri-q/args.c @@ -0,0 +1,617 @@ + +#include +#include +#include +#include +#include +#include + +/*****************************************************************************/ +/* Memory management routines */ + +/* Free an array of owned strings. */ +void +pb_FreeStringArray(char **string_array) +{ + char **p; + + if (!string_array) return; + for (p = string_array; *p; p++) free(*p); + free(string_array); +} + +struct pb_PlatformParam * +pb_PlatformParam(char *name, char *version) +{ + if (name == NULL) { + fprintf(stderr, "pb_PlatformParam: Invalid argument\n"); + exit(-1); + } + + struct pb_PlatformParam *ret = + (struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam)); + + ret->name = name; + ret->version = version; + return ret; +} + +void +pb_FreePlatformParam(struct pb_PlatformParam *p) +{ + if (p == NULL) return; + + free(p->name); + free(p->version); + free(p); +} + +struct pb_DeviceParam * +pb_DeviceParam_index(int index) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_INDEX; + ret->index = index; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_cpu(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_CPU; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_gpu(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_GPU; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_accelerator(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_ACCELERATOR; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_name(char *name) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_NAME; + ret->name = name; + return ret; +} + +void +pb_FreeDeviceParam(struct pb_DeviceParam *p) +{ + if (p == NULL) return; + + switch(p->criterion) { + case pb_Device_NAME: + free(p->name); + break; + case pb_Device_INDEX: + case pb_Device_CPU: + case pb_Device_ACCELERATOR: + break; + default: + fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n"); + exit(-1); + } +} + +void +pb_FreeParameters(struct pb_Parameters *p) +{ + free(p->outFile); + pb_FreeStringArray(p->inpFiles); + pb_FreePlatformParam(p->platform); + pb_FreeDeviceParam(p->device); + free(p); +} + +/*****************************************************************************/ + +/* Parse a comma-delimited list of strings into an + * array of strings. */ +static char ** +read_string_array(char *in) +{ + char **ret; + int i; + int count; /* Number of items in the input */ + char *substring; /* Current substring within 'in' */ + + /* Count the number of items in the string */ + count = 1; + for (i = 0; in[i]; i++) if (in[i] == ',') count++; + + /* Allocate storage */ + ret = (char **)malloc((count + 1) * sizeof(char *)); + + /* Create copies of the strings from the list */ + substring = in; + for (i = 0; i < count; i++) { + char *substring_end; + int substring_length; + + /* Find length of substring */ + for (substring_end = substring; + (*substring_end != ',') && (*substring_end != 0); + substring_end++); + + substring_length = substring_end - substring; + + /* Allocate memory and copy the substring */ + ret[i] = (char *)malloc(substring_length + 1); + memcpy(ret[i], substring, substring_length); + ret[i][substring_length] = 0; + + /* go to next substring */ + substring = substring_end + 1; + } + ret[i] = NULL; /* Write the sentinel value */ + + return ret; +} + +static void +report_parse_error(const char *str) +{ + fputs(str, stderr); +} + +/* Interpret a string as a 'pb_DeviceParam' value. + * Return a pointer to a new value, or NULL on failure. + */ +static struct pb_DeviceParam * +read_device_param(char *str) +{ + /* Try different ways of interpreting 'device_string' until one works */ + + /* If argument is an integer, then interpret it as a device index */ + errno = 0; + char *end; + long device_int = strtol(str, &end, 10); + if (!errno) { + /* Negative numbers are not valid */ + if (device_int < 0 || device_int > INT_MAX) return NULL; + + return pb_DeviceParam_index(device_int); + } + + /* Match against predefined strings */ + if (strcmp(str, "CPU") == 0) + return pb_DeviceParam_cpu(); + if (strcmp(str, "GPU") == 0) + return pb_DeviceParam_gpu(); + if (strcmp(str, "ACCELERATOR") == 0) + return pb_DeviceParam_accelerator(); + + /* Assume any other string is a device name */ + return pb_DeviceParam_name(strdup(str)); +} + +/* Interpret a string as a 'pb_PlatformParam' value. + * Return a pointer to a new value, or NULL on failure. + */ +static struct pb_PlatformParam * +read_platform_param(char *str) +{ + int separator_index; /* Index of the '-' character separating + * name and version number. It's -1 if + * there's no '-' character. */ + + /* Find the last occurrence of '-' in 'str' */ + { + char *cur; + separator_index = -1; + for (cur = str; *cur; cur++) { + if (*cur == '-') separator_index = cur - str; + } + } + + /* The platform name is either the entire string, or all characters before + * the separator */ + int name_length = separator_index == -1 ? strlen(str) : separator_index; + char *name_str = (char *)malloc(name_length + 1); + memcpy(name_str, str, name_length); + name_str[name_length] = 0; + + /* The version is either NULL, or all characters after the separator */ + char *version_str; + if (separator_index == -1) { + version_str = NULL; + } + else { + const char *version_input_str = str + separator_index + 1; + int version_length = strlen(version_input_str); + + version_str = (char *)malloc(version_length + 1); + memcpy(version_str, version_input_str, version_length); + version_str[version_length] = 0; + } + + /* Create output structure */ + return pb_PlatformParam(name_str, version_str); +} + +/****************************************************************************/ +/* Argument parsing state */ + +/* Argument parsing state. + * + * Arguments that are interpreted by the argument parser are removed from + * the list. Variables 'argc' and 'argn' do not count arguments that have + * been removed. + * + * During argument parsing, the array of arguments is compacted, overwriting + * the erased arguments. Variable 'argv_put' points to the array element + * where the next argument will be written. Variable 'argv_get' points to + * the array element where the next argument will be read from. + */ +struct argparse { + int argc; /* Number of arguments. Mutable. */ + int argn; /* Current argument index. */ + char **argv_get; /* Argument value being read. */ + char **argv_put; /* Argument value being written. + * argv_put <= argv_get. */ +}; + +static void +initialize_argparse(struct argparse *ap, int argc, char **argv) +{ + ap->argc = argc; + ap->argn = 0; + ap->argv_get = ap->argv_put = argv; +} + +/* Finish argument parsing, without processing the remaining arguments. + * Write new argument count into _argc. */ +static void +finalize_argparse(struct argparse *ap, int *_argc, char **argv) +{ + /* Move the remaining arguments */ + for(; ap->argn < ap->argc; ap->argn++) + *ap->argv_put++ = *ap->argv_get++; + + /* Update the argument count */ + *_argc = ap->argc; + + /* Insert a terminating NULL */ + argv[ap->argc] = NULL; +} + +/* Delete the current argument. The argument will not be visible + * when argument parsing is done. */ +static void +delete_argument(struct argparse *ap) +{ + if (ap->argn >= ap->argc) { + fprintf(stderr, "delete_argument\n"); + } + ap->argc--; + ap->argv_get++; +} + +/* Go to the next argument. Also, move the current argument to its + * final location in argv. */ +static void +next_argument(struct argparse *ap) +{ + if (ap->argn >= ap->argc) { + fprintf(stderr, "next_argument\n"); + } + /* Move argument to its new location. */ + *ap->argv_put++ = *ap->argv_get++; + ap->argn++; +} + +static int +is_end_of_arguments(struct argparse *ap) +{ + return ap->argn == ap->argc; +} + +/* Get the current argument */ +static char * +get_argument(struct argparse *ap) +{ + return *ap->argv_get; +} + +/* Get the current argument, and also delete it */ +static char * +consume_argument(struct argparse *ap) +{ + char *ret = get_argument(ap); + delete_argument(ap); + return ret; +} + +/****************************************************************************/ + +/* The result of parsing a command-line argument */ +typedef enum { + ARGPARSE_OK, /* Success */ + ARGPARSE_ERROR, /* Error */ + ARGPARSE_DONE /* Success, and do not continue parsing */ +} result; + +typedef result parse_action(struct argparse *ap, struct pb_Parameters *params); + + +/* A command-line option */ +struct option { + char short_name; /* If not 0, the one-character + * name of this option */ + const char *long_name; /* If not NULL, the long name of this option */ + parse_action *action; /* What to do when this option occurs. + * Sentinel value is NULL. + */ +}; + +/* Output file + * + * -o FILE + */ +static result +parse_output_file(struct argparse *ap, struct pb_Parameters *params) +{ + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting file name after '-o'\n"); + return ARGPARSE_ERROR; + } + + /* Replace the output file name */ + free(params->outFile); + params->outFile = strdup(consume_argument(ap)); + + return ARGPARSE_OK; +} + +/* Input files + * + * -i FILE,FILE,... + */ +static result +parse_input_files(struct argparse *ap, struct pb_Parameters *params) +{ + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting file name after '-i'\n"); + return ARGPARSE_ERROR; + } + + /* Replace the input file list */ + pb_FreeStringArray(params->inpFiles); + params->inpFiles = read_string_array(consume_argument(ap)); + return ARGPARSE_OK; +} + +/* End of options + * + * -- + */ + +static result +parse_end_options(struct argparse *ap, struct pb_Parameters *params) +{ + return ARGPARSE_DONE; +} + +/* OpenCL device + * + * --device X + */ + +static result +parse_device(struct argparse *ap, struct pb_Parameters *params) +{ + /* Read the next argument, which specifies a device */ + + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting device specification after '--device'\n"); + return ARGPARSE_ERROR; + } + + char *device_string = consume_argument(ap); + struct pb_DeviceParam *device_param = read_device_param(device_string); + + if (!device_param) { + report_parse_error("Unrecognized device specification format on command line\n"); + return ARGPARSE_ERROR; + } + + /* Save the result */ + pb_FreeDeviceParam(params->device); + params->device = device_param; + + return ARGPARSE_OK; +} + +static result +parse_platform(struct argparse *ap, struct pb_Parameters *params) +{ + /* Read the next argument, which specifies a platform */ + + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting device specification after '--platform'\n"); + return ARGPARSE_ERROR; + } + + char *platform_string = consume_argument(ap); + struct pb_PlatformParam *platform_param = read_platform_param(platform_string); + + if (!platform_param) { + report_parse_error("Unrecognized platform specification format on command line\n"); + return ARGPARSE_ERROR; + } + + /* Save the result */ + pb_FreePlatformParam(params->platform); + params->platform = platform_param; + + return ARGPARSE_OK; +} + + +static struct option options[] = { + { 'o', NULL, &parse_output_file }, + { 'i', NULL, &parse_input_files }, + { '-', NULL, &parse_end_options }, + { 0, "device", &parse_device }, + { 0, "platform", &parse_platform }, + { 0, NULL, NULL } +}; + +static int +is_last_option(struct option *op) +{ + return op->action == NULL; +} + +/****************************************************************************/ + +/* Parse command-line parameters. + * Return zero on error, nonzero otherwise. + * On error, the other outputs may be invalid. + * + * The information collected from parameters is used to update + * 'ret'. 'ret' should be initialized. + * + * '_argc' and 'argv' are updated to contain only the unprocessed arguments. + */ +static int +pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv) +{ + char *err_message; + struct argparse ap; + + /* Each argument */ + initialize_argparse(&ap, *_argc, argv); + while(!is_end_of_arguments(&ap)) { + result arg_result; /* Result of parsing this option */ + char *arg = get_argument(&ap); + + /* Process this argument */ + if (arg[0] == '-') { + /* Single-character flag */ + if ((arg[1] != 0) && (arg[2] == 0)) { + delete_argument(&ap); /* This argument is consumed here */ + + /* Find a matching short option */ + struct option *op; + for (op = options; !is_last_option(op); op++) { + if (op->short_name == arg[1]) { + arg_result = (*op->action)(&ap, ret); + goto option_was_processed; + } + } + + /* No option matches */ + report_parse_error("Unexpected command-line parameter\n"); + arg_result = ARGPARSE_ERROR; + goto option_was_processed; + } + + /* Long flag */ + if (arg[1] == '-') { + delete_argument(&ap); /* This argument is consumed here */ + + /* Find a matching long option */ + struct option *op; + for (op = options; !is_last_option(op); op++) { + if (op->long_name && strcmp(&arg[2], op->long_name) == 0) { + arg_result = (*op->action)(&ap, ret); + goto option_was_processed; + } + } + + /* No option matches */ + report_parse_error("Unexpected command-line parameter\n"); + arg_result = ARGPARSE_ERROR; + goto option_was_processed; + } + } + else { + /* Other arguments are ignored */ + next_argument(&ap); + arg_result = ARGPARSE_OK; + goto option_was_processed; + } + + option_was_processed: + /* Decide what to do next based on 'arg_result' */ + switch(arg_result) { + case ARGPARSE_OK: + /* Continue processing */ + break; + + case ARGPARSE_ERROR: + /* Error exit from the function */ + return 0; + + case ARGPARSE_DONE: + /* Normal exit from the argument parsing loop */ + goto end_of_options; + } + } /* end for each argument */ + + /* If all arguments were processed, then normal exit from the loop */ + + end_of_options: + finalize_argparse(&ap, _argc, argv); + return 1; +} + +/*****************************************************************************/ +/* Other exported functions */ + +struct pb_Parameters * +pb_ReadParameters(int *_argc, char **argv) +{ + struct pb_Parameters *ret = + (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters)); + + /* Initialize the parameters structure */ + ret->outFile = NULL; + ret->inpFiles = (char **)malloc(sizeof(char *)); + ret->inpFiles[0] = NULL; + ret->platform = NULL; + ret->device = NULL; + + /* Read parameters and update _argc, argv */ + if (!pb_ParseParameters(ret, _argc, argv)) { + /* Parse error */ + pb_FreeParameters(ret); + return NULL; + } + + return ret; +} + +int +pb_Parameters_CountInputs(struct pb_Parameters *p) +{ + int n; + + for (n = 0; p->inpFiles[n]; n++); + return n; +} + diff --git a/benchmarks/opencl/mri-q/computeQ.c b/benchmarks/opencl/mri-q/computeQ.c new file mode 100644 index 00000000..65ed6f4d --- /dev/null +++ b/benchmarks/opencl/mri-q/computeQ.c @@ -0,0 +1,118 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include +#include +#include +#include "ocl.h" +#include "macros.h" +#include "computeQ.h" +#include "parboil.h" + +#define NC 1 + +void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm) +{ + int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK; + if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK) + phiMagBlocks++; + + size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK; + size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK; + + cl_int clStatus; + clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(cl_mem),&phiR_d); + clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(cl_mem),&phiI_d); + clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&phiMag_d); + clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(int),&numK); + CHECK_ERROR("clSetKernelArg") + + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimPhiMagGrid,&DimPhiMagBlock,0,NULL,NULL); + CHECK_ERROR("clEnqueueNDRangeKernel") +} + +static +unsigned long long int +readElapsedTime(cl_event internal) +{ + cl_int status; + cl_ulong t_begin, t_end; + status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &t_begin, NULL); + if (status != CL_SUCCESS) return 0; + status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &t_end, NULL); + if (status != CL_SUCCESS) return 0; + return (unsigned long long int)(t_end - t_begin); +} + + +void computeQ_GPU (int numK,int numX, + cl_mem x_d, cl_mem y_d, cl_mem z_d, + struct kValues* kVals, + cl_mem Qr_d, cl_mem Qi_d, + clPrmtr* clPrm) +{ + int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID; + if (numK % KERNEL_Q_K_ELEMS_PER_GRID) + QGrids++; + int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK; + if (numX % KERNEL_Q_THREADS_PER_BLOCK) + QBlocks++; + + size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC; + size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC; + + cl_int clStatus; + cl_mem ck; + ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus); + + int QGrid; + for (QGrid = 0; QGrid < QGrids; QGrid++) { + // Put the tile of K values into constant mem + int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID; + struct kValues* kValsTile = kVals + QGridBase; + int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase); + + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + + clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK); + clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase); + clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d); + clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d); + clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d); + clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d); + clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d); + clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck); + CHECK_ERROR("clSetKernelArg") + + printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock); + + #define TIMED_EXECUTION + #ifdef TIMED_EXECUTION + cl_event e; + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,&e); + CHECK_ERROR("clEnqueueNDRangeKernel") + clWaitForEvents(1, &e); + printf ("%llu\n", readElapsedTime(e)); + #else + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL); + CHECK_ERROR("clEnqueueNDRangeKernel") + #endif + } +} + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi) +{ + *phiMag = (float* ) memalign(16, numK * sizeof(float)); + *Qr = (float*) memalign(16, numX * sizeof (float)); + *Qi = (float*) memalign(16, numX * sizeof (float)); +} + diff --git a/benchmarks/opencl/mri-q/computeQ.h b/benchmarks/opencl/mri-q/computeQ.h new file mode 100644 index 00000000..ec919220 --- /dev/null +++ b/benchmarks/opencl/mri-q/computeQ.h @@ -0,0 +1,14 @@ +#ifndef __COMPUTEQ__ +#define __COMPUTEQ__ + +void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm); +void computeQ_GPU (int numK,int numX, + cl_mem x_d, cl_mem y_d, cl_mem z_d, + struct kValues* kVals, + cl_mem Qr_d, cl_mem Qi_d, + clPrmtr* clPrm); + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi); + +#endif diff --git a/benchmarks/opencl/mri-q/file.cc b/benchmarks/opencl/mri-q/file.cc new file mode 100644 index 00000000..15b07075 --- /dev/null +++ b/benchmarks/opencl/mri-q/file.cc @@ -0,0 +1,78 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +//#include +#include +#include +#include +#include + +#include "file.h" + +#if __BYTE_ORDER != __LITTLE_ENDIAN +# error "File I/O is not implemented for this system: wrong endianness." +#endif + +extern "C" +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI) +{ + int numK, numX; + FILE* fid = fopen(fName, "r"); + + if (fid == NULL) + { + fprintf(stderr, "Cannot open input file\n"); + exit(-1); + } + fread (&numK, sizeof (int), 1, fid); + *_numK = numK; + fread (&numX, sizeof (int), 1, fid); + *_numX = numX; + *kx = (float *) memalign(16, numK * sizeof (float)); + fread (*kx, sizeof (float), numK, fid); + *ky = (float *) memalign(16, numK * sizeof (float)); + fread (*ky, sizeof (float), numK, fid); + *kz = (float *) memalign(16, numK * sizeof (float)); + fread (*kz, sizeof (float), numK, fid); + *x = (float *) memalign(16, numX * sizeof (float)); + fread (*x, sizeof (float), numX, fid); + *y = (float *) memalign(16, numX * sizeof (float)); + fread (*y, sizeof (float), numX, fid); + *z = (float *) memalign(16, numX * sizeof (float)); + fread (*z, sizeof (float), numX, fid); + *phiR = (float *) memalign(16, numK * sizeof (float)); + fread (*phiR, sizeof (float), numK, fid); + *phiI = (float *) memalign(16, numK * sizeof (float)); + fread (*phiI, sizeof (float), numK, fid); + fclose (fid); +} + +extern "C" +void outputData(char* fName, float* outR, float* outI, int numX) +{ + FILE* fid = fopen(fName, "w"); + uint32_t tmp32; + + if (fid == NULL) + { + fprintf(stderr, "Cannot open output file\n"); + exit(-1); + } + + /* Write the data size */ + tmp32 = numX; + fwrite(&tmp32, sizeof(uint32_t), 1, fid); + + /* Write the reconstructed data */ + fwrite (outR, sizeof (float), numX, fid); + fwrite (outI, sizeof (float), numX, fid); + fclose (fid); +} diff --git a/benchmarks/opencl/mri-q/file.h b/benchmarks/opencl/mri-q/file.h new file mode 100644 index 00000000..c6a61ef4 --- /dev/null +++ b/benchmarks/opencl/mri-q/file.h @@ -0,0 +1,22 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI); + +void outputData(char* fName, float* outR, float* outI, int numX); + +#ifdef __cplusplus +} +#endif diff --git a/benchmarks/opencl/mri-q/gpu_info.c b/benchmarks/opencl/mri-q/gpu_info.c new file mode 100644 index 00000000..4d641f81 --- /dev/null +++ b/benchmarks/opencl/mri-q/gpu_info.c @@ -0,0 +1,55 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ +//#include +#include +#include +#include +#include + +#include "gpu_info.h" + +void compute_active_thread(size_t *thread, + size_t *grid, + int task, + int pad, + int major, + int minor, + int sm) +{ + int max_thread; + int max_block=8; + if(major==1) + { + if(minor>=2) + max_thread=1024; + else + max_thread=768; + } + else if(major==2) + max_thread=1536; + else + //newer GPU //keep using 2.0 + max_thread=1536; + + int _grid; + int _thread; + + if(task*pad>sm*max_thread) + { + _thread=max_thread/max_block; + _grid = ((task*pad+_thread-1)/_thread)*_thread; + } + else + { + _thread=pad; + _grid=task*pad; + } + + thread[0]=_thread; + grid[0]=_grid; +} diff --git a/benchmarks/opencl/mri-q/gpu_info.h b/benchmarks/opencl/mri-q/gpu_info.h new file mode 100644 index 00000000..4219cda9 --- /dev/null +++ b/benchmarks/opencl/mri-q/gpu_info.h @@ -0,0 +1,20 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef __GPUINFOH__ +#define __GPUINFOH__ + +void compute_active_thread(size_t *thread, + size_t *grid, + int task, + int pad, + int major, + int minor, + int sm); + +#endif diff --git a/benchmarks/opencl/mri-q/kernel.cl b/benchmarks/opencl/mri-q/kernel.cl new file mode 100644 index 00000000..39a1842e --- /dev/null +++ b/benchmarks/opencl/mri-q/kernel.cl @@ -0,0 +1,51 @@ +#include "macros.h" + +__kernel void +ComputePhiMag_GPU(__global float* phiR, __global float* phiI, __global float* phiMag, int numK) { + int indexK = get_global_id(0); + float real = indexK; + float imag = indexK; + if (indexK < numK) { + /*float*/ real = phiR[indexK]; + /*float*/ imag = phiI[indexK]; + phiMag[indexK] = real*real + imag*imag; + } +} + +__kernel void +ComputeQ_GPU(int numK, int kGlobalIndex, + __global float* x, __global float* y, __global float* z, + __global float* Qr, __global float* Qi, __global struct kValues* ck) +{ + float sX; + float sY; + float sZ; + float sQr; + float sQi; + + // Determine the element of the X arrays computed by this thread + int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0); + + // Read block's X values from global mem to shared mem + sX = x[xIndex]; + sY = y[xIndex]; + sZ = z[xIndex]; + sQr = Qr[xIndex]; + sQi = Qi[xIndex]; + + int kIndex = 0; + for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID); kIndex++) { + if (kGlobalIndex < numK) { + float expArg; + expArg = PIx2 * (ck[kIndex].Kx * sX + + ck[kIndex].Ky * sY + + ck[kIndex].Kz * sZ); + sQr = sQr + ck[kIndex].PhiMag * cos(expArg); // native_cos(expArg); + sQi = sQi + ck[kIndex].PhiMag * sin(expArg); // native_sin(expArg); + } + kGlobalIndex++; + } + + Qr[xIndex] = sQr; + Qi[xIndex] = sQi; +} diff --git a/benchmarks/opencl/mri-q/libmri-q.a b/benchmarks/opencl/mri-q/libmri-q.a new file mode 100644 index 00000000..372b1941 Binary files /dev/null and b/benchmarks/opencl/mri-q/libmri-q.a differ diff --git a/benchmarks/opencl/mri-q/libsgemm.a b/benchmarks/opencl/mri-q/libsgemm.a new file mode 100644 index 00000000..372b1941 Binary files /dev/null and b/benchmarks/opencl/mri-q/libsgemm.a differ diff --git a/benchmarks/opencl/mri-q/macros.h b/benchmarks/opencl/mri-q/macros.h new file mode 100644 index 00000000..501ead7e --- /dev/null +++ b/benchmarks/opencl/mri-q/macros.h @@ -0,0 +1,21 @@ +#ifndef __MACROS__ +#define __MACROS__ + +#define PI 3.1415926535897932384626433832795029f +#define PIx2 6.2831853071795864769252867665590058f + +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define K_ELEMS_PER_GRID 2048 + +#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 +#define KERNEL_Q_THREADS_PER_BLOCK 256 +#define KERNEL_Q_K_ELEMS_PER_GRID 1024 + +struct kValues { + float Kx; + float Ky; + float Kz; + float PhiMag; +}; + +#endif diff --git a/benchmarks/opencl/mri-q/main.cc b/benchmarks/opencl/mri-q/main.cc new file mode 100644 index 00000000..9288845f --- /dev/null +++ b/benchmarks/opencl/mri-q/main.cc @@ -0,0 +1,293 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * C code for creating the Q data structure for fast convolution-based + * Hessian multiplication for arbitrary k-space trajectories. + * + * Inputs: + * kx - VECTOR of kx values, same length as ky and kz + * ky - VECTOR of ky values, same length as kx and kz + * kz - VECTOR of kz values, same length as kx and ky + * x - VECTOR of x values, same length as y and z + * y - VECTOR of y values, same length as x and z + * z - VECTOR of z values, same length as x and y + * phi - VECTOR of the Fourier transform of the spatial basis + * function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz. + * + * recommended g++ options: + * -O3 -lm -ffast-math -funroll-all-loops + */ + +#include +#include +#include +#include + +#include "ocl.h" +#include "file.h" +#include "macros.h" +#include "computeQ.h" + +static void +setupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr,clPrmtr* clPrm) +{ + cl_int clStatus; + *dev_ptr = clCreateBuffer(clPrm->clContext,CL_MEM_READ_ONLY,num*size,NULL,&clStatus); + CHECK_ERROR("clCreateBuffer"); + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL); + CHECK_ERROR("clEnequeueWriteBuffer"); +} + +static void +cleanupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr, clPrmtr* clPrm) +{ + cl_int clStatus; + clStatus = clEnqueueReadBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL); + CHECK_ERROR("clEnqueueReadBuffer") + clStatus = clReleaseMemObject(*dev_ptr); + CHECK_ERROR("clReleaseMemObject") +} + +int +main (int argc, char *argv[]) { + int numX, numK; /* Number of X and K values */ + int original_numK; /* Number of K values in input file */ + float *kx, *ky, *kz; /* K trajectory (3D vectors) */ + float *x, *y, *z; /* X coordinates (3D vectors) */ + float *phiR, *phiI; /* Phi values (complex) */ + float *phiMag; /* Magnitude of Phi */ + float *Qr, *Qi; /* Q signal (complex) */ + + struct kValues* kVals; + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + pb_InitializeTimerSet(&timers); + + /* Read command line */ + params = pb_ReadParameters(&argc, argv); + /*if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL)) + { + fprintf(stderr, "Expecting one input filename\n"); + exit(-1); + }*/ + params->inpFiles = (char **)malloc(sizeof(char *) * 2); + params->inpFiles[0] = (char *)malloc(100); + params->inpFiles[1] = NULL; + strncpy(params->inpFiles[0], "32_32_32_dataset.bin", 100); + + /* Read in data */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + inputData(params->inpFiles[0], + &original_numK, &numX, + &kx, &ky, &kz, + &x, &y, &z, + &phiR, &phiI); + + printf("OK\n"); + + /* Reduce the number of k-space samples if a number is given + * on the command line */ + if (argc < 2) + numK = original_numK; + else + { + int inputK; + char *end; + inputK = strtol(argv[1], &end, 10); + if (end == argv[1]) + { + fprintf(stderr, "Expecting an integer parameter\n"); + exit(-1); + } + + numK = MIN(inputK, original_numK); + } + + printf("%d pixels in output; %d samples in trajectory; using %d samples\n", + numX, original_numK, numK); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + clPrmtr clPrm; + + pb_Context* pb_context; + pb_context = pb_InitOpenCLContext(params); + if (pb_context == NULL) { + fprintf (stderr, "Error: No OpenCL platform/device can be found."); + return -1; + } + + cl_int clStatus; + cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId; + cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId; + clPrm.clContext = (cl_context) pb_context->clContext; + + clPrm.clCommandQueue = clCreateCommandQueue(clPrm.clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus); + CHECK_ERROR("clCreateCommandQueue") + + pb_SetOpenCL(&(clPrm.clContext), &(clPrm.clCommandQueue)); + + printf("OK\n"); + + //const char* clSource[] = {readFile("src/opencl_base/kernels.cl")}; + //cl_program clProgram = clCreateProgramWithSource(clPrm.clContext,1,clSource,NULL,&clStatus); + cl_program clProgram = clCreateProgramWithBuiltInKernels( + clPrm.clContext, 1, &clDevice, "ComputePhiMag_GPU;ComputeQ_GPU", &clStatus); + CHECK_ERROR("clCreateProgramWithSource") + + char options[50]; + sprintf(options,"-I src/opencl_nvidia"); + clStatus = clBuildProgram(clProgram,0,NULL,options,NULL,NULL); + if (clStatus != CL_SUCCESS) { + char buf[4096]; + clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 4096, buf, NULL); + printf ("%s\n", buf); + CHECK_ERROR("clBuildProgram") + } + + /* Create CPU data structures */ + createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi); + + /* GPU section 1 (precompute PhiMag) */ + { + clPrm.clKernel = clCreateKernel(clProgram,"ComputePhiMag_GPU",&clStatus); + CHECK_ERROR("clCreateKernel") + + /* Mirror several data structures on the device */ + cl_mem phiR_d; + cl_mem phiI_d; + cl_mem phiMag_d; + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + setupMemoryGPU(numK,sizeof(float),&phiR_d,phiR,&clPrm); + setupMemoryGPU(numK,sizeof(float),&phiI_d,phiI,&clPrm); + phiMag_d = clCreateBuffer(clPrm.clContext,CL_MEM_WRITE_ONLY,numK*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computePhiMag_GPU(numK, phiR_d, phiI_d, phiMag_d, &clPrm); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + cleanupMemoryGPU(numK,sizeof(float),&phiMag_d,phiMag,&clPrm); + + clStatus = clReleaseMemObject(phiR_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(phiI_d); + CHECK_ERROR("clReleaseMemObject") + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + kVals = (struct kValues*)calloc(numK, sizeof (struct kValues)); + + int k; + for (k = 0; k < numK; k++) { + kVals[k].Kx = kx[k]; + kVals[k].Ky = ky[k]; + kVals[k].Kz = kz[k]; + kVals[k].PhiMag = phiMag[k]; + } + + free(phiMag); + + clStatus = clReleaseKernel(clPrm.clKernel); + + /* GPU section 2 */ + { + clPrm.clKernel = clCreateKernel(clProgram,"ComputeQ_GPU",&clStatus); + CHECK_ERROR("clCreateKernel") + + cl_mem x_d; + cl_mem y_d; + cl_mem z_d; + cl_mem Qr_d; + cl_mem Qi_d; + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + setupMemoryGPU(numX,sizeof(float),&x_d,x,&clPrm); + setupMemoryGPU(numX,sizeof(float),&y_d,y,&clPrm); + setupMemoryGPU(numX,sizeof(float),&z_d,z,&clPrm); + + Qr_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + clMemSet(&clPrm,Qr_d,0,numX*sizeof(float)); + Qi_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + clMemSet(&clPrm,Qi_d,0,numX*sizeof(float)); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computeQ_GPU(numK, numX, x_d, y_d, z_d, kVals, Qr_d, Qi_d, &clPrm); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + clStatus = clReleaseMemObject(x_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(y_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(z_d); + CHECK_ERROR("clReleaseMemObject") + cleanupMemoryGPU(numX,sizeof(float),&Qr_d,Qr,&clPrm); + cleanupMemoryGPU(numX,sizeof(float),&Qi_d,Qi,&clPrm); + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + if (params->outFile) + { + /* Write Q to file */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + outputData(params->outFile, Qr, Qi, numX); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + } + + free (kx); + free (ky); + free (kz); + free (x); + free (y); + free (z); + free (phiR); + free (phiI); + free (kVals); + free (Qr); + free (Qi); + + //free((void*)clSource[0]); + + clStatus = clReleaseKernel(clPrm.clKernel); + clStatus = clReleaseProgram(clProgram); + clStatus = clReleaseCommandQueue(clPrm.clCommandQueue); + clStatus = clReleaseContext(clPrm.clContext); + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + pb_PrintTimerSet(&timers); + + pb_FreeParameters(params); + + return 0; +} diff --git a/benchmarks/opencl/mri-q/ocl copy.c b/benchmarks/opencl/mri-q/ocl copy.c new file mode 100644 index 00000000..9ce9a2f5 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl copy.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include "ocl.h" + +char* readFile(const char* fileName) +{ + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} + +void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size) +{ + cl_int clStatus; + char* temp = (char*)malloc(size); + memset(temp,val,size); + clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + free(temp); +} diff --git a/benchmarks/opencl/mri-q/ocl copy.h b/benchmarks/opencl/mri-q/ocl copy.h new file mode 100644 index 00000000..8840a868 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl copy.h @@ -0,0 +1,21 @@ +#ifndef __OCLH__ +#define __OCLH__ + +typedef struct { + cl_uint major; + cl_uint minor; + cl_uint multiProcessorCount; +} OpenCLDeviceProp; + +void clMemSet(cl_command_queue, cl_mem, int, size_t); +char* readFile(const char*); + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +#endif diff --git a/benchmarks/opencl/mri-q/ocl.c b/benchmarks/opencl/mri-q/ocl.c new file mode 100644 index 00000000..61cd5fe6 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include "ocl.h" +#include + +char* readFile(const char* fileName) +{ + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} + +void clMemSet(clPrmtr* clPrm, cl_mem buf, int val, size_t size) +{ + cl_int clStatus; + char* temp = (char*)malloc(size); + memset(temp,val,size); + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + free(temp); +} diff --git a/benchmarks/opencl/mri-q/ocl.h b/benchmarks/opencl/mri-q/ocl.h new file mode 100644 index 00000000..04c33cd3 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl.h @@ -0,0 +1,23 @@ +#ifndef __OCLH__ +#define __OCLH__ + +#include + +typedef struct { + cl_context clContext; + cl_command_queue clCommandQueue; + cl_kernel clKernel; +} clPrmtr; + +void clMemSet(clPrmtr*, cl_mem, int, size_t); +char* readFile(const char*); + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +#endif diff --git a/benchmarks/opencl/mri-q/parboil.h b/benchmarks/opencl/mri-q/parboil.h new file mode 100644 index 00000000..4c9a8b5e --- /dev/null +++ b/benchmarks/opencl/mri-q/parboil.h @@ -0,0 +1,348 @@ +/* + * (c) 2010 The Board of Trustees of the University of Illinois. + */ +#ifndef PARBOIL_HEADER +#define PARBOIL_HEADER + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* A platform as specified by the user on the command line */ +struct pb_PlatformParam { + char *name; /* The platform name. This string is owned. */ + char *version; /* The platform version; may be NULL. + * This string is owned. */ +}; + +/* Create a PlatformParam from the given strings. + * 'name' must not be NULL. 'version' may be NULL. + * If not NULL, the strings should have been allocated by malloc(), + * and they will be owned by the returned object. + */ +struct pb_PlatformParam * +pb_PlatformParam(char *name, char *version); + +void +pb_FreePlatformParam(struct pb_PlatformParam *); + +/* A criterion for how to select a device */ +enum pb_DeviceSelectionCriterion { + pb_Device_INDEX, /* Enumerate the devices and select one + * by its number */ + pb_Device_CPU, /* Select a CPU device */ + pb_Device_GPU, /* Select a GPU device */ + pb_Device_ACCELERATOR, /* Select an accelerator device */ + pb_Device_NAME /* Select a device by name */ +}; + +/* A device as specified by the user on the command line */ +struct pb_DeviceParam { + enum pb_DeviceSelectionCriterion criterion; + union { + int index; /* If criterion == pb_Device_INDEX, + * the index of the device */ + char *name; /* If criterion == pb_Device_NAME, + * the name of the device. + * This string is owned. */ + }; +}; + +struct pb_DeviceParam * +pb_DeviceParam_index(int index); + +struct pb_DeviceParam * +pb_DeviceParam_cpu(void); + +struct pb_DeviceParam * +pb_DeviceParam_gpu(void); + +struct pb_DeviceParam * +pb_DeviceParam_accelerator(void); + +/* Create a by-name device selection criterion. + * The string should have been allocated by malloc(), and it will will be + * owned by the returned object. + */ +struct pb_DeviceParam * +pb_DeviceParam_name(char *name); + +void +pb_FreeDeviceParam(struct pb_DeviceParam *); + +/* Command line parameters for benchmarks */ +struct pb_Parameters { + char *outFile; /* If not NULL, the raw output of the + * computation should be saved to this + * file. The string is owned. */ + char **inpFiles; /* A NULL-terminated array of strings + * holding the input file(s) for the + * computation. The array and strings + * are owned. */ + struct pb_PlatformParam *platform; /* If not NULL, the platform + * specified on the command line. */ + struct pb_DeviceParam *device; /* If not NULL, the device + * specified on the command line. */ +}; + +/* Read command-line parameters. + * + * The argc and argv parameters to main are read, and any parameters + * interpreted by this function are removed from the argument list. + * + * A new instance of struct pb_Parameters is returned. + * If there is an error, then an error message is printed on stderr + * and NULL is returned. + */ +struct pb_Parameters * +pb_ReadParameters(int *_argc, char **argv); + +/* Free an instance of struct pb_Parameters. + */ +void +pb_FreeParameters(struct pb_Parameters *p); + +void +pb_FreeStringArray(char **); + +/* Count the number of input files in a pb_Parameters instance. + */ +int +pb_Parameters_CountInputs(struct pb_Parameters *p); + +/* A time or duration. */ +//#if _POSIX_VERSION >= 200112L +typedef unsigned long long pb_Timestamp; /* time in microseconds */ +//#else +//# error "Timestamps not implemented" +//#endif + +enum pb_TimerState { + pb_Timer_STOPPED, + pb_Timer_RUNNING, +}; + +struct pb_Timer { + enum pb_TimerState state; + pb_Timestamp elapsed; /* Amount of time elapsed so far */ + pb_Timestamp init; /* Beginning of the current time interval, + * if state is RUNNING. End of the last + * recorded time interfal otherwise. */ +}; + +/* Reset a timer. + * Use this to initialize a timer or to clear + * its elapsed time. The reset timer is stopped. + */ +void +pb_ResetTimer(struct pb_Timer *timer); + +/* Start a timer. The timer is set to RUNNING mode and + * time elapsed while the timer is running is added to + * the timer. + * The timer should not already be running. + */ +void +pb_StartTimer(struct pb_Timer *timer); + +/* Stop a timer. + * This stops adding elapsed time to the timer. + * The timer should not already be stopped. + */ +void +pb_StopTimer(struct pb_Timer *timer); + +/* Get the elapsed time in seconds. */ +double +pb_GetElapsedTime(struct pb_Timer *timer); + +/* Execution time is assigned to one of these categories. */ +enum pb_TimerID { + pb_TimerID_NONE = 0, + pb_TimerID_IO, /* Time spent in input/output */ + pb_TimerID_KERNEL, /* Time spent computing on the device, + * recorded asynchronously */ + pb_TimerID_COPY, /* Time spent synchronously moving data + * to/from device and allocating/freeing + * memory on the device */ + pb_TimerID_DRIVER, /* Time spent in the host interacting with the + * driver, primarily for recording the time + * spent queueing asynchronous operations */ + pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */ + pb_TimerID_COMPUTE, /* Time for all program execution other + * than parsing command line arguments, + * I/O, kernel, and copy */ + pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and + * host activity: automatically filled in, + * not intended for direct usage */ + pb_TimerID_LAST /* Number of timer IDs */ +}; + +/* Dynamic list of asynchronously tracked times between events */ +struct pb_async_time_marker_list { + char *label; // actually just a pointer to a string + enum pb_TimerID timerID; /* The ID to which the interval beginning + * with this marker should be attributed */ + void * marker; + //cudaEvent_t marker; /* The driver event for this marker */ + struct pb_async_time_marker_list *next; +}; + +struct pb_SubTimer { + char *label; + struct pb_Timer timer; + struct pb_SubTimer *next; +}; + +struct pb_SubTimerList { + struct pb_SubTimer *current; + struct pb_SubTimer *subtimer_list; +}; + +/* A set of timers for recording execution times. */ +struct pb_TimerSet { + enum pb_TimerID current; + struct pb_async_time_marker_list* async_markers; + pb_Timestamp async_begin; + pb_Timestamp wall_begin; + struct pb_Timer timers[pb_TimerID_LAST]; + struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST]; +}; + +/* Reset all timers in the set. */ +void +pb_InitializeTimerSet(struct pb_TimerSet *timers); + +void +pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category); + +/* Select which timer the next interval of time should be accounted + * to. The selected timer is started and other timers are stopped. + * Using pb_TimerID_NONE stops all timers. */ +void +pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer); + +void +pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category); + +/* Print timer values to standard output. */ +void +pb_PrintTimerSet(struct pb_TimerSet *timers); + +/* Release timer resources */ +void +pb_DestroyTimerSet(struct pb_TimerSet * timers); + +void +pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr); + + +typedef struct pb_Device_tag { + char* name; + void* clDevice; + int id; + unsigned int in_use; + unsigned int available; +} pb_Device; + +struct pb_Context_tag; +typedef struct pb_Context_tag pb_Context; + +typedef struct pb_Platform_tag { + char* name; + char* version; + void* clPlatform; + unsigned int in_use; + pb_Context** contexts; + pb_Device** devices; +} pb_Platform; + +struct pb_Context_tag { + void* clPlatformId; + void* clContext; + void* clDeviceId; + pb_Platform* pb_platform; + pb_Device* pb_device; +}; + +// verbosely print out list of platforms and their devices to the console. +pb_Platform** +pb_GetPlatforms(); + +// Choose a platform according to the given platform specification +pb_Platform* +pb_GetPlatform(struct pb_PlatformParam *platform); + +// choose a platform: by name, name & version +pb_Platform* +pb_GetPlatformByName(const char* name); + +pb_Platform* +pb_GetPlatformByNameAndVersion(const char* name, const char* version); + +// Choose a device according to the given device specification +pb_Device* +pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device); + +pb_Device** +pb_GetDevices(pb_Platform* pb_platform); + +// choose a device by name. +pb_Device* +pb_GetDeviceByName(pb_Platform* pb_platform, const char* name); + +pb_Platform* +pb_GetPlatformByEnvVars(); + +pb_Context* +pb_InitOpenCLContext(struct pb_Parameters* parameters); + +void +pb_ReleasePlatforms(); + +void +pb_ReleaseContext(pb_Context* c); + +void +pb_PrintPlatformInfo(pb_Context* c); + +void +perf_init(); + +//#define MEASURE_KERNEL_TIME + +#include + +#ifdef MEASURE_KERNEL_TIME +#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c)) +cl_int +pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */); +#endif + +enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR }; +void pb_sig_float(char*, float*, int); +void pb_sig_double(char*, double*, int); +void pb_sig_short(char*, short*, int); +void pb_sig_int(char*, int*, int); +void pb_sig_uchar(char*, unsigned char*, unsigned int); +void pb_sig_clmem(char*, cl_command_queue, cl_mem, int); + +#ifdef __cplusplus +} +#endif + +#endif //PARBOIL_HEADER + diff --git a/benchmarks/opencl/mri-q/parboil_opencl.c b/benchmarks/opencl/mri-q/parboil_opencl.c new file mode 100644 index 00000000..a4db1680 --- /dev/null +++ b/benchmarks/opencl/mri-q/parboil_opencl.c @@ -0,0 +1,1394 @@ +/* + * (c) 2007 The Board of Trustees of the University of Illinois. + */ + +#include +#include +#include +#include +#include +#include + +#if _POSIX_VERSION >= 200112L +# include +#endif + +//#include "perfmon.h" + +cl_context *clContextPtr; +cl_command_queue *clCommandQueuePtr; + +// #define DISABLE_PARBOIL_TIMER + +/*****************************************************************************/ +/* Timer routines */ + +static int is_async(enum pb_TimerID timer) +{ + return (timer == pb_TimerID_KERNEL) || + (timer == pb_TimerID_COPY_ASYNC); +} + +static int is_blocking(enum pb_TimerID timer) +{ + return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE); +} + +#define INVALID_TIMERID pb_TimerID_LAST + +static int asyncs_outstanding(struct pb_TimerSet* timers) +{ + return (timers->async_markers != NULL) && + (timers->async_markers->timerID != INVALID_TIMERID); +} + +static struct pb_async_time_marker_list * +get_last_async(struct pb_TimerSet* timers) +{ + /* Find the last event recorded thus far */ + struct pb_async_time_marker_list * last_event = timers->async_markers; + if(last_event != NULL && last_event->timerID != INVALID_TIMERID) { + while(last_event->next != NULL && + last_event->next->timerID != INVALID_TIMERID) + last_event = last_event->next; + return last_event; + } else + return NULL; +} + +static void insert_marker(struct pb_TimerSet* tset, enum pb_TimerID timer) +{ + cl_int ciErrNum = CL_SUCCESS; + struct pb_async_time_marker_list ** new_event = &(tset->async_markers); + + while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { + new_event = &((*new_event)->next); + } + + if(*new_event == NULL) { + *new_event = (struct pb_async_time_marker_list *) + malloc(sizeof(struct pb_async_time_marker_list)); + (*new_event)->marker = calloc(1, sizeof(cl_event)); + /* + // I don't think this is needed at all. I believe clEnqueueMarker 'creates' the event +#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) +fprintf(stderr, "Creating Marker [%d]\n", timer); + *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Creating User Event Object!\n"); + } + ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Setting User Event Status!\n"); + } +#endif +*/ + (*new_event)->next = NULL; + } + + /* valid event handle now aquired: insert the event record */ + (*new_event)->label = NULL; + (*new_event)->timerID = timer; + ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Enqueueing Marker!\n"); + } + +} + +static void insert_submarker(struct pb_TimerSet* tset, char *label, enum pb_TimerID timer) +{ + cl_int ciErrNum = CL_SUCCESS; + struct pb_async_time_marker_list ** new_event = &(tset->async_markers); + + while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { + new_event = &((*new_event)->next); + } + + if(*new_event == NULL) { + *new_event = (struct pb_async_time_marker_list *) + malloc(sizeof(struct pb_async_time_marker_list)); + (*new_event)->marker = calloc(1, sizeof(cl_event)); + /* +#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) +fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer); + *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Creating User Event Object!\n"); + } + ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Setting User Event Status!\n"); + } +#endif +*/ + (*new_event)->next = NULL; + } + + /* valid event handle now aquired: insert the event record */ + (*new_event)->label = label; + (*new_event)->timerID = timer; + ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Enqueueing Marker!\n"); + } + +} + + +/* Assumes that all recorded events have completed */ +static pb_Timestamp record_async_times(struct pb_TimerSet* tset) +{ + struct pb_async_time_marker_list * next_interval = NULL; + struct pb_async_time_marker_list * last_marker = get_last_async(tset); + pb_Timestamp total_async_time = 0; + enum pb_TimerID timer; + + for(next_interval = tset->async_markers; next_interval != last_marker; + next_interval = next_interval->next) { + cl_ulong command_start=0, command_end=0; + cl_int ciErrNum = CL_SUCCESS; + + ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum); + } + + ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum); + } + + pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3); + tset->timers[next_interval->timerID].elapsed += interval; + if (next_interval->label != NULL) { + struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list; + while (subtimer != NULL) { + if ( strcmp(subtimer->label, next_interval->label) == 0) { + subtimer->timer.elapsed += interval; + break; + } + subtimer = subtimer->next; + } + } + total_async_time += interval; + next_interval->timerID = INVALID_TIMERID; + } + + if(next_interval != NULL) + next_interval->timerID = INVALID_TIMERID; + + return total_async_time; +} + +static void +accumulate_time(pb_Timestamp *accum, + pb_Timestamp start, + pb_Timestamp end) +{ +//#if _POSIX_VERSION >= 200112L + *accum += end - start; +//#else +//# error "Timestamps not implemented for this system" +//#endif +} + +//#if _POSIX_VERSION >= 200112L +static pb_Timestamp get_time() +{ + //struct timeval tv; + //gettimeofday(&tv, NULL); + //return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); + return 0; +} +//#else +//# error "no supported time libraries are available on this platform" +//#endif + +void +pb_ResetTimer(struct pb_Timer *timer) +{ +//#ifndef DISABLE_PARBOIL_TIMER + timer->state = pb_Timer_STOPPED; + +//#if _POSIX_VERSION >= 200112L + timer->elapsed = 0; +//#else +//# error "pb_ResetTimer: not implemented for this system" +//#endif +//#endif +} + +void +pb_StartTimer(struct pb_Timer *timer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + if (timer->state != pb_Timer_STOPPED) { + fputs("Ignoring attempt to start a running timer\n", stderr); + return; + } + + timer->state = pb_Timer_RUNNING; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StartTimer: not implemented for this system" +#endif +#endif*/ +} + +void +pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + + unsigned int numNotStopped = 0x3; // 11 + if (timer->state != pb_Timer_STOPPED) { + fputs("Warning: Timer was not stopped\n", stderr); + numNotStopped &= 0x1; // Zero out 2^1 + } + if (subtimer->state != pb_Timer_STOPPED) { + fputs("Warning: Subtimer was not stopped\n", stderr); + numNotStopped &= 0x2; // Zero out 2^0 + } + if (numNotStopped == 0x0) { + fputs("Ignoring attempt to start running timer and subtimer\n", stderr); + return; + } + + timer->state = pb_Timer_RUNNING; + subtimer->state = pb_Timer_RUNNING; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + + if (numNotStopped & 0x2) { + timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } + + if (numNotStopped & 0x1) { + subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } + } +#else +# error "pb_StartTimer: not implemented for this system" +#endif + +#endif*/ +} + +void +pb_StopTimer(struct pb_Timer *timer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + + pb_Timestamp fini; + + if (timer->state != pb_Timer_RUNNING) { + fputs("Ignoring attempt to stop a stopped timer\n", stderr); + return; + } + + timer->state = pb_Timer_STOPPED; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + fini = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StopTimer: not implemented for this system" +#endif + + accumulate_time(&timer->elapsed, timer->init, fini); + timer->init = fini; + +#endif*/ +} + +void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { +/*#ifndef DISABLE_PARBOIL_TIMER + + pb_Timestamp fini; + + unsigned int numNotRunning = 0x3; // 11 + if (timer->state != pb_Timer_RUNNING) { + fputs("Warning: Timer was not running\n", stderr); + numNotRunning &= 0x1; // Zero out 2^1 + } + if (subtimer->state != pb_Timer_RUNNING) { + fputs("Warning: Subtimer was not running\n", stderr); + numNotRunning &= 0x2; // Zero out 2^0 + } + if (numNotRunning == 0x0) { + fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); + return; + } + + + timer->state = pb_Timer_STOPPED; + subtimer->state = pb_Timer_STOPPED; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + fini = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StopTimer: not implemented for this system" +#endif + + if (numNotRunning & 0x2) { + accumulate_time(&timer->elapsed, timer->init, fini); + timer->init = fini; + } + + if (numNotRunning & 0x1) { + accumulate_time(&subtimer->elapsed, subtimer->init, fini); + subtimer->init = fini; + } + +#endif*/ +} + +/* Get the elapsed time in seconds. */ +double +pb_GetElapsedTime(struct pb_Timer *timer) +{ + /*double ret; +#ifndef DISABLE_PARBOIL_TIMER + + if (timer->state != pb_Timer_STOPPED) { + fputs("Elapsed time from a running timer is inaccurate\n", stderr); + } + +#if _POSIX_VERSION >= 200112L + ret = timer->elapsed / 1e6; +#else +# error "pb_GetElapsedTime: not implemented for this system" +#endif +#endif + return ret;*/ + return 0; +} + +void +pb_InitializeTimerSet(struct pb_TimerSet *timers) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + int n; + + timers->wall_begin = 0; //get_time(); + timers->current = pb_TimerID_NONE; + + timers->async_markers = NULL; + + for (n = 0; n < pb_TimerID_LAST; n++) { + pb_ResetTimer(&timers->timers[n]); + timers->sub_timer_list[n] = NULL; + } +#endif*/ +} + +void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) { + clContextPtr = ((cl_context *)p_clContextPtr); + clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr); +} + +void +pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { +/*#ifndef DISABLE_PARBOIL_TIMER + + struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc + (sizeof(struct pb_SubTimer)); + + int len = strlen(label); + + subtimer->label = (char *) malloc (sizeof(char)*(len+1)); + sprintf(subtimer->label, "%s\0", label); + + pb_ResetTimer(&subtimer->timer); + subtimer->next = NULL; + + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; + if (subtimerlist == NULL) { + subtimerlist = (struct pb_SubTimerList *) calloc + (1, sizeof(struct pb_SubTimerList)); + subtimerlist->subtimer_list = subtimer; + timers->sub_timer_list[pb_Category] = subtimerlist; + } else { + // Append to list + struct pb_SubTimer *element = subtimerlist->subtimer_list; + while (element->next != NULL) { + element = element->next; + } + element->next = subtimer; + } + +#endif*/ +} + +void +pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + + /* Stop the currently running timer */ + if (timers->current != pb_TimerID_NONE) { + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; + struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL; + + if (!is_async(timers->current) ) { + if (timers->current != timer) { + if (currSubTimer != NULL) { + pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); + } else { + pb_StopTimer(&timers->timers[timers->current]); + } + } else { + if (currSubTimer != NULL) { + pb_StopTimer(&currSubTimer->timer); + } + } + } else { + insert_marker(timers, timer); + if (!is_async(timer)) { // if switching to async too, keep driver going + pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + pb_Timestamp currentTime = 0; //get_time(); + + /* The only cases we check for asynchronous task completion is + * when an overlapping CPU operation completes, or the next + * segment blocks on completion of previous async operations */ + if( asyncs_outstanding(timers) && + (!is_async(timers->current) || is_blocking(timer) ) ) { + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + /* CL_COMPLETE if completed */ + + cl_int ciErrNum = CL_SUCCESS; + cl_int async_done = CL_COMPLETE; + + ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Querying EventInfo!\n"); + } + + + if(is_blocking(timer)) { + /* Async operations completed after previous CPU operations: + * overlapped time is the total CPU time since this set of async + * operations were first issued */ + + // timer to switch to is COPY or NONE + if(async_done != CL_COMPLETE) { + accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), + timers->async_begin,currentTime); + } + + /* Wait on async operation completion */ + ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Waiting for Events!\n"); + } + + pb_Timestamp total_async_time = record_async_times(timers); + + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + if(async_done == CL_COMPLETE) { + //fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time); + timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; + } + + } else + /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ + // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding + // so something is deeper in stack + if(async_done == CL_COMPLETE ) { + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); + } + } + + /* Start the new timer */ + if (timer != pb_TimerID_NONE) { + if(!is_async(timer)) { + pb_StartTimer(&timers->timers[timer]); + } else { + // toSwitchTo Is Async (KERNEL/COPY_ASYNC) + if (!asyncs_outstanding(timers)) { + /* No asyncs outstanding, insert a fresh async marker */ + + insert_marker(timers, timer); + timers->async_begin = currentTime; + } else if(!is_async(timers->current)) { + /* Previous asyncs still in flight, but a previous SwitchTo + * already marked the end of the most recent async operation, + * so we can rename that marker as the beginning of this async + * operation */ + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + last_event->label = NULL; + last_event->timerID = timer; + } + if (!is_async(timers->current)) { + pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + timers->current = timer; + +#endif +#endif +} + +void +pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; + struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL; + + if (timers->current != pb_TimerID_NONE) { + if (!is_async(timers->current) ) { + if (timers->current != category) { + if (curr != NULL) { + pb_StopTimerAndSubTimer(&timers->timers[timers->current], &curr->timer); + } else { + pb_StopTimer(&timers->timers[timers->current]); + } + } else { + if (curr != NULL) { + pb_StopTimer(&curr->timer); + } + } + } else { + insert_submarker(timers, label, category); + if (!is_async(category)) { // if switching to async too, keep driver going + pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + pb_Timestamp currentTime = 0; //get_time(); + + /* The only cases we check for asynchronous task completion is + * when an overlapping CPU operation completes, or the next + * segment blocks on completion of previous async operations */ + if( asyncs_outstanding(timers) && + (!is_async(timers->current) || is_blocking(category) ) ) { + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + /* CL_COMPLETE if completed */ + + cl_int ciErrNum = CL_SUCCESS; + cl_int async_done = CL_COMPLETE; + + ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Querying EventInfo!\n"); + } + + if(is_blocking(category)) { + /* Async operations completed after previous CPU operations: + * overlapped time is the total CPU time since this set of async + * operations were first issued */ + + // timer to switch to is COPY or NONE + // if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP + // anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization + // it seems that the extra sync wall time isn't being recorded anywhere + if(async_done != CL_COMPLETE) + accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), + timers->async_begin,currentTime); + + /* Wait on async operation completion */ + ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Waiting for Events!\n"); + } + pb_Timestamp total_async_time = record_async_times(timers); + + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + // If it did finish, then accumulate all the async time that did happen into OVERLAP + // the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed. + if(async_done == CL_COMPLETE /*cudaSuccess*/) + timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; + + } else + /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ + // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding + // so something is deeper in stack + if(async_done == CL_COMPLETE /*cudaSuccess*/) { + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); + } + // else, this isn't blocking, so just check the next time around + } + + subtimerlist = timers->sub_timer_list[category]; + struct pb_SubTimer *subtimer = NULL; + + if (label != NULL) { + subtimer = subtimerlist->subtimer_list; + while (subtimer != NULL) { + if (strcmp(subtimer->label, label) == 0) { + break; + } else { + subtimer = subtimer->next; + } + } + } + + /* Start the new timer */ + if (category != pb_TimerID_NONE) { + if(!is_async(category)) { + if (subtimerlist != NULL) { + subtimerlist->current = subtimer; + } + + if (category != timers->current && subtimer != NULL) { + pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); + } else if (subtimer != NULL) { + pb_StartTimer(&subtimer->timer); + } else { + pb_StartTimer(&timers->timers[category]); + } + } else { + if (subtimerlist != NULL) { + subtimerlist->current = subtimer; + } + + // toSwitchTo Is Async (KERNEL/COPY_ASYNC) + if (!asyncs_outstanding(timers)) { + /* No asyncs outstanding, insert a fresh async marker */ + insert_submarker(timers, label, category); + timers->async_begin = currentTime; + } else if(!is_async(timers->current)) { + /* Previous asyncs still in flight, but a previous SwitchTo + * already marked the end of the most recent async operation, + * so we can rename that marker as the beginning of this async + * operation */ + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + last_event->timerID = category; + last_event->label = label; + } // else, marker for switchToThis was already inserted + + //toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running + if (!is_async(timers->current)) { + pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + timers->current = category; +#endif +#endif +} + +void +pb_PrintTimerSet(struct pb_TimerSet *timers) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + pb_Timestamp wall_end = 0; //get_time(); + + struct pb_Timer *t = timers->timers; + struct pb_SubTimer* sub = NULL; + + int maxSubLength; + + const char *categories[] = { + "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" + }; + + const int maxCategoryLength = 10; + + int i; + for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format + if(pb_GetElapsedTime(&t[i]) != 0) { + + // Print Category Timer + printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); + + if (timers->sub_timer_list[i] != NULL) { + sub = timers->sub_timer_list[i]->subtimer_list; + maxSubLength = 0; + while (sub != NULL) { + // Find longest SubTimer label + if (strlen(sub->label) > maxSubLength) { + maxSubLength = strlen(sub->label); + } + sub = sub->next; + } + + // Fit to Categories + if (maxSubLength <= maxCategoryLength) { + maxSubLength = maxCategoryLength; + } + + sub = timers->sub_timer_list[i]->subtimer_list; + + // Print SubTimers + while (sub != NULL) { + printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); + sub = sub->next; + } + } + } + } + + if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) + printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); + + float walltime = (wall_end - timers->wall_begin)/ 1e6; + printf("Timer Wall Time: %f\n", walltime); + +#endif +#endif +} + +void pb_DestroyTimerSet(struct pb_TimerSet * timers) +{ +#ifndef DISABLE_PARBOIL_TIMER + /* clean up all of the async event markers */ + struct pb_async_time_marker_list* event = timers->async_markers; + while(event != NULL) { + + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker); + if (ciErrNum != CL_SUCCESS) { + //fprintf(stderr, "Error Waiting for Events!\n"); + } + + ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) ); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Release Events!\n"); + } + + free((event)->marker); + struct pb_async_time_marker_list* next = ((event)->next); + + free(event); + + // (*event) = NULL; + event = next; + } + + int i = 0; + for(i = 0; i < pb_TimerID_LAST; ++i) { + if (timers->sub_timer_list[i] != NULL) { + struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; + struct pb_SubTimer *prev = NULL; + while (subtimer != NULL) { + free(subtimer->label); + prev = subtimer; + subtimer = subtimer->next; + free(prev); + } + free(timers->sub_timer_list[i]); + } + } +#endif +} + +static pb_Platform** ptr = NULL; + +// verbosely print out list of platforms and their devices to the console. +pb_Platform** +pb_GetPlatforms() { + if (ptr == NULL) { + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + if (num_platforms == 0) return NULL; + + ptr = (pb_Platform **) malloc(sizeof(pb_Platform *) * (num_platforms + 1)); + cl_platform_id* ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, ids, NULL); + + unsigned int i; + for (i = 0; i < num_platforms; i++) { + ptr[i] = (pb_Platform *) malloc(sizeof(pb_Platform)); + ptr[i]->clPlatform = ids[i]; + ptr[i]->contexts = NULL; + ptr[i]->in_use = 0; + ptr[i]->devices = NULL; + + size_t sz; + clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, 0, NULL, &sz); + char* name = (char *) malloc(sz + 1); + clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, sz, name, NULL); + name[sz] = '\0'; + ptr[i]->name = name; + + clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, 0, NULL, &sz); + char* version = (char *) malloc(sz + 1); + clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, sz, version, NULL); + version[sz] = '\0'; + ptr[i]->version = version; + } + ptr[i] = NULL; + + free(ids); + } + + return (pb_Platform**) ptr; +} + +pb_Context* +createContext(pb_Platform* pb_platform, pb_Device* pb_device) { + pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context)); + cl_int clStatus; + cl_context_properties clCps[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)(pb_platform->clPlatform), 0 + }; + c->clContext = + clCreateContext(clCps, 1, (cl_device_id*)&pb_device->clDevice, NULL, NULL, &clStatus); + c->clPlatformId = pb_platform->clPlatform; + c->clDeviceId = pb_device->clDevice; + c->pb_platform = pb_platform; + c->pb_device = pb_device; + pb_platform->in_use = 1; + pb_device->in_use = 1; + unsigned int i = 0; + if (pb_platform->contexts == NULL) { + pb_platform->contexts = (pb_Context**) malloc(2*sizeof(pb_Context*)); + } else { + for (i = 0; pb_platform->contexts[i] != NULL; i++) {}; + pb_platform->contexts = (pb_Context**) realloc(pb_platform->contexts, + (i+1)*sizeof(pb_Context*)); + } + pb_platform->contexts[i+1] = NULL; + pb_platform->contexts[i] = c; + return c; +} + +// choose a platform by name. +pb_Platform* +pb_GetPlatformByName(const char* name) { + pb_Platform** ps = (pb_Platform **) pb_GetPlatforms(); + if (ps == NULL) return NULL; + if (name == NULL) { + return *ps; + } + + while (*ps) { + if (strstr((*ps)->name, name)) break; + ps++; + } + return (pb_Platform*) *ps; +} + +pb_Device** +pb_GetDevices(pb_Platform* pb_platform) { + if (pb_platform->devices == NULL) { + cl_uint num_devs; + cl_device_id* dev_ids; + clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform, + CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs); + if (num_devs == 0) return NULL; + + pb_platform->devices = + (pb_Device **) malloc((num_devs + 1) * sizeof(pb_Device *)); + dev_ids = (cl_device_id *) malloc(sizeof(cl_device_id) * num_devs); + clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform, + CL_DEVICE_TYPE_ALL, num_devs, dev_ids, NULL); + + unsigned int i; + for (i = 0; i < num_devs; i++) { + pb_platform->devices[i] = (pb_Device *) malloc(sizeof(pb_Device)); + + pb_platform->devices[i]->clDevice = dev_ids[i]; + pb_platform->devices[i]->id = i; + + size_t sz; + clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, 0, NULL, &sz); + char* name = (char *) malloc(sz + 1); + clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, sz, name, NULL); + name[sz] = '\0'; + pb_platform->devices[i]->name = (char *) name; + + cl_bool available; + clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL); + pb_platform->devices[i]->available = (int) available; + + pb_platform->devices[i]->in_use = 0; + } + pb_platform->devices[i] = NULL; + } + return (pb_Device **) pb_platform->devices; +} + +// choose a device by name. +static pb_Device* +pb_SelectDeviceByName(pb_Device **ds, const char* name) { + if (ds == NULL) return NULL; + if (name == NULL) return *ds; + while (*ds) { + if (strstr((*ds)->name, name)) break; + ds++; + } + + return *ds; +} + +// choose a device by name and set the device's 'in_use' flag. +pb_Device* +pb_GetDeviceByName(pb_Platform* pb_platform, const char* name) { + pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform); + pb_Device *d = pb_SelectDeviceByName(ds, name); + + if (d) d->in_use = 1; + + return d; +} + +void +pb_ReleasePlatforms() { + if (!ptr) return; + pb_Platform** cur_ptr = ptr; + while (*cur_ptr) { + pb_Platform* pfptr = *cur_ptr++; + if (pfptr->devices) { + pb_Device** dvptr = pfptr->devices; + while (*dvptr) { + pb_Device* d = *dvptr++; + free(d->name); + free(d); + } + free(pfptr->devices); + } + if (pfptr->contexts) { + pb_Context** cptr = pfptr->contexts; + while (*cptr) { + free(*cptr++); + } + free(pfptr->contexts); + } + free(pfptr->name); + free(pfptr); + } + free(ptr); + ptr = NULL; +} + +pb_Platform* +pb_GetPlatformByNameAndVersion(const char* name, const char* version) { + pb_Platform** ps = (pb_Platform **) pb_GetPlatforms(); + if (ps == NULL) return NULL; + if (name == NULL) return *ps; + while (*ps) { + if (strstr((*ps)->name, name) && strstr((*ps)->version, version)) break; + ps++; + } + return (pb_Platform*) *ps; +} + +/* Return a pointer to the device at the specified index, or NULL. + * Used by pb_GetDevice. */ +static pb_Device * +select_device_by_index(pb_Device** ds, int id) +{ + int i = 0; + pb_Device** p = ds; + while (*p && (i < id)) { p++; i++; } + return *p; +} + +/* Return a pointer to the device with the specified type, or NULL. + * Used by pb_GetDevice. */ +static pb_Device * +select_device_by_type(pb_Device** ds, + enum pb_DeviceSelectionCriterion criterion) +{ + cl_device_type sought_type; + + /* Determine the OpenCL device type to search for */ + switch(criterion) { + case pb_Device_CPU: + sought_type = CL_DEVICE_TYPE_CPU; + break; + case pb_Device_GPU: + sought_type = CL_DEVICE_TYPE_GPU; + break; + case pb_Device_ACCELERATOR: + sought_type = CL_DEVICE_TYPE_ACCELERATOR; + break; + default: + fprintf(stderr, "pb_GetDevice: Invalid device type"); + exit(-1); + } + + /* Find the device */ + { + pb_Device** p = ds; + cl_device_type type; + while (*p) { + clGetDeviceInfo(((cl_device_id) ((*p)->clDevice)), CL_DEVICE_TYPE, + sizeof(cl_device_type), &type, NULL); + if (type == sought_type) break; + } + + return *p; + } +} + +pb_Device* +pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device) +{ + pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform); + + // The list of devices must be nonempty + if (ds == NULL || *ds == NULL) { + fprintf(stderr, "Error: No device is found in platform: name = %s, version = %s\n.", pb_platform->name, pb_platform->version); + exit(-1); + } + + pb_Device *selected_device = NULL; + + if (device != NULL) { + /* Use 'device' to select and return a device. + * If unable to select a device, fall + * back on the default selection mechanism. */ + switch(device->criterion) { + case pb_Device_INDEX: + selected_device = select_device_by_index(ds, device->index); + break; + case pb_Device_GPU: + case pb_Device_CPU: + case pb_Device_ACCELERATOR: + selected_device = select_device_by_type(ds, device->criterion); + break; + case pb_Device_NAME: + selected_device = pb_SelectDeviceByName(ds, device->name); + break; + default: + fprintf(stderr, "pb_GetDevice: Invalid argument"); + exit(-1); + } + } + + /* By default or if user-specified selection failed, + * select the first device */ + if (selected_device == NULL) + selected_device = *ds; + + /* Set the in_use flag */ + selected_device->in_use = 1; + + return selected_device; +} + +pb_Device* +pb_GetDeviceByEnvVars(pb_Platform* pb_platform) { + + /* Convert environment variables to a 'pb_DeviceParam' */ + struct pb_DeviceParam *param = NULL; + + char* device_num = getenv("PARBOIL_DEVICE_NUMBER"); + if (device_num && strcmp(device_num, "")) { + int id = atoi(device_num); + param = pb_DeviceParam_index(id); + } + else { + char* device_name = getenv("PARBOIL_DEVICE_NAME"); + if (device_name && strcmp(device_name, "")) { + param = pb_DeviceParam_name(strdup(device_name)); + } + else { + char* device_type = getenv("PARBOIL_DEVICE_TYPE"); + if (device_type && strcmp(device_type, "")) { + if (strcmp(device_type, "CPU") == 0) + param = pb_DeviceParam_cpu(); + else if (strcmp(device_type, "GPU") == 0) + param = pb_DeviceParam_gpu(); + else if (strcmp(device_type, "ACCELERATOR") == 0) + param = pb_DeviceParam_accelerator(); + } + } + } + + /* Get a device */ + pb_Device *d = pb_GetDevice(pb_platform, param); + pb_FreeDeviceParam(param); + + return d; +} + +pb_Platform* +pb_GetPlatformByEnvVars() { + char* name = getenv("PARBOIL_PLATFORM_NAME"); + char* version = getenv("PARBOIL_PLATFORM_VERSION"); + + /* Create a pb_PlatformParam object (or NULL) representing the data from the + * environment variables */ + struct pb_PlatformParam *platform; + + if (name) { + if (version) { + platform = pb_PlatformParam(strdup(name), strdup(version)); + } + else { + platform = pb_PlatformParam(strdup(name), NULL); + } + } + else { + platform = NULL; + } + + /* Convert to a platform */ + pb_Platform *p = pb_GetPlatform(platform); + pb_FreePlatformParam(platform); + + return p; +} + +/* Choose an OpenCL platform based on the given command-line parameters. + * If NULL, use the default OpenCL platform. */ +pb_Platform* +pb_GetPlatform(struct pb_PlatformParam *platform) { + if (platform != NULL) { + /* Try to use command-line parameters to choose platform */ + char *name = platform->name; + char *version = platform->version; + + if (!name) { + fprintf(stderr, "Internal error: NULL pointer"); + exit(-1); + } + + if (version) { + pb_Platform* p = pb_GetPlatformByNameAndVersion(name, version); + if (p) return p; + } + + pb_Platform* p = pb_GetPlatformByName(name); + if (p) return p; + } + + pb_Platform* p = pb_GetPlatformByName(NULL); + if (p == NULL) { + fprintf(stderr, "Error: No OpenCL platform in this system. Exiting."); + exit(-1); + } + return p; +} + +//extern void perf_init(); +//extern void mxpa_scheduler_init(); + +pb_Context* +pb_InitOpenCLContext(struct pb_Parameters* parameters) { +#if 0 + pb_Platform* ps = pb_GetPlatform(parameters->platform); + if (!ps) return NULL; + pb_Device* ds = pb_GetDevice(ps, parameters->device); + if (!ds) return NULL; + + /* HERE INITIALIZE TIMER */ + //perf_init(); + //mxpa_scheduler_init(); + + pb_Context* c = createContext(ps, ds); + pb_PrintPlatformInfo(c); + return c; +#endif + cl_int _err; + cl_platform_id platform_id; + cl_device_id device_id; + cl_context context; + clGetPlatformIDs(1, &platform_id, NULL); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL); + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err); + + pb_Context* c = (pb_Context*)malloc(sizeof(pb_Context)); + c->clContext = context; + c->clDeviceId = device_id; + c->clPlatformId = platform_id; + c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform)); + c->pb_device = (pb_Device*)malloc(sizeof(pb_Device)); + c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2); + c->pb_platform->devices[0] = c->pb_device; + c->pb_platform->devices[1] = NULL; + c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2); + c->pb_platform->contexts[0] = c; + c->pb_platform->contexts[1] = NULL; + c->pb_platform->in_use = 1; + c->pb_device->in_use = 1; + return c; +} + +void +pb_ReleaseOpenCLContext(pb_Context* c) { + pb_ReleasePlatforms(); +} + +void +pb_PrintPlatformInfo(pb_Context* c) { + /*pb_Platform** ps = pb_GetPlatforms(); + if (!ps) { + fprintf (stderr, "No platform found"); + return; + } + + printf ("********************************************************\n"); + printf ("DETECTED OPENCL PLATFORMS AND DEVICES:\n"); + printf ("--------------------------------------------------------\n"); + + while (*ps) { + printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version); + if (c->pb_platform == *ps) printf (" (SELECTED)"); + printf ("\n"); + + pb_Device** ds = (pb_Device **) pb_GetDevices((*ps)); + if (ds == NULL) { + printf (" + (No devices)\n"); + } else { + while (*ds) { + printf (" + %d: %s", (*ds)->id, (*ds)->name); + if (c->pb_device == *ds) printf (" (SELECTED)"); + printf ("\n"); + ds++; + } + } + + ps++; + } + printf ("********************************************************\n");*/ +} + +#ifdef MEASURE_KERNEL_TIME + +#undef clEnqueueNDRangeKernel + +//extern void pin_trace_enable(char*); +//extern void pin_trace_disable(char*); + +cl_int +pb_clEnqueueNDRangeKernel(cl_command_queue q/* command_queue */, + cl_kernel k/* kernel */, + cl_uint d/* work_dim */, + const size_t * o/* global_work_offset */, + const size_t * gws/* global_work_size */, + const size_t * lws/* local_work_size */, + cl_uint n/* num_events_in_wait_list */, + const cl_event * w/* event_wait_list */, + cl_event * e/* event */) { + + char buf[128]; + struct timeval begin, end; + clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 128, buf, NULL); + +#if 0 + int i; + for (i = 0; i < d; i++) { + printf ("%s: %d: %d / %d\n", buf, i, gws[i], (lws == NULL ? 0 : lws[i])); + } +#endif + + clFinish(q); clFlush(q); + //pin_trace_enable(buf); + //gettimeofday(&begin, NULL); + cl_int result = clEnqueueNDRangeKernel(q, k, d, o, gws, lws, n, w, e); + clFinish(q); clFlush(q); + //gettimeofday(&end, NULL); + //pin_trace_disable(buf); + //float t = (float)(end.tv_sec - begin.tv_sec) + (end.tv_usec - begin.tv_usec) / 1000000.0f; + fflush(stdout); + fflush(stderr); + //printf ("PBTIMER: %s: %f\n", buf, t); + return result; +} + +#endif + +void +pb_sig_float(char* c, float* p, int sz) { + int i; + double s = 0.0; + for (i = 0; i < sz; i++) s += p[i] * (float)(i+1); + printf ("[Signature] %s = %lf\n", c, s); +} + +void +pb_sig_double(char* c, double* p, int sz) { + int i; + double s = 0.0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lf\n", c, s); +} + +void +pb_sig_short(char* c, short* p, int sz) { + int i; + long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void +pb_sig_int(char* c, int* p, int sz) { + int i; + long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void +pb_sig_uchar(char* c, unsigned char* p, unsigned int sz) { + int i; + unsigned long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void pb_sig_clmem(char* s, cl_command_queue command_queue, cl_mem memobj, int ty) { + size_t sz; + if (clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &sz, NULL) != CL_SUCCESS) { + printf ("Something wrong.\n"); + assert(0); + } else { + printf ("size = %d\n", sz); + } + char* hp; // = (char*) malloc(sz); + //posix_memalign((void**)&hp, 64, sz); + hp = (char*)malloc(sz); + + clEnqueueReadBuffer (command_queue, + memobj, + CL_TRUE, + 0, + sz, + hp, + 0, + NULL, + NULL); + + if (ty == T_FLOAT) pb_sig_float(s, (float*)hp, sz/sizeof(float)); + if (ty == T_DOUBLE) pb_sig_double(s, (double*)hp, sz/sizeof(double)); + if (ty == T_INT) pb_sig_int(s, (int*)hp, sz/sizeof(int)); + if (ty == T_SHORT) pb_sig_short(s, (short*)hp, sz/sizeof(short)); + if (ty == T_UCHAR) pb_sig_uchar(s, (unsigned char*)hp, sz/sizeof(char)); + + free(hp); +} + diff --git a/benchmarks/opencl/sad/parboil.c b/benchmarks/opencl/sad/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/sad/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - - diff --git a/benchmarks/opencl/spmv/parboil.c b/benchmarks/opencl/spmv/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/spmv/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - - diff --git a/benchmarks/opencl/stencil/parboil.c b/benchmarks/opencl/stencil/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/stencil/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - -