From f2dd6120786d392e287f94b1b9a337a5e6c3915a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 25 Nov 2019 04:49:55 -0500 Subject: [PATCH] update --- benchmarks/opencl/bfs/Makefile | 14 +- benchmarks/opencl/guassian/Makefile | 68 + benchmarks/opencl/guassian/OriginalParallel.c | 241 +++ benchmarks/opencl/guassian/README.txt | 60 + benchmarks/opencl/guassian/clutils.cpp | 1413 +++++++++++++++++ benchmarks/opencl/guassian/clutils.h | 281 ++++ benchmarks/opencl/guassian/gaussianElim.h | 40 + benchmarks/opencl/guassian/gettimeofday.cpp | 74 + benchmarks/opencl/guassian/gettimeofday.h | 17 + benchmarks/opencl/guassian/kernel.cl | 49 + benchmarks/opencl/guassian/libgaussian.a | Bin 0 -> 9572 bytes benchmarks/opencl/guassian/main.cc | 412 +++++ benchmarks/opencl/guassian/matrix4.txt | 11 + benchmarks/opencl/guassian/run | 1 + benchmarks/opencl/guassian/utils.cpp | 204 +++ benchmarks/opencl/guassian/utils.h | 84 + benchmarks/opencl/kmeans/Makefile | 13 +- benchmarks/opencl/kmeans/cluster.c | 155 ++ benchmarks/opencl/kmeans/getopt.c | 1184 ++++++++++++++ benchmarks/opencl/kmeans/getopt.h | 191 +++ benchmarks/opencl/kmeans/kernel.cl | 61 + benchmarks/opencl/kmeans/kmeans | Bin 0 -> 73253 bytes benchmarks/opencl/kmeans/kmeans.h | 65 + benchmarks/opencl/kmeans/kmeans_clustering.c | 176 ++ benchmarks/opencl/kmeans/libkmeans.a | Bin 0 -> 9346 bytes benchmarks/opencl/kmeans/main.cc | 359 +++++ benchmarks/opencl/kmeans/read_input.c | 338 ++++ benchmarks/opencl/kmeans/rmse.c | 94 ++ benchmarks/opencl/kmeans/run | 1 + benchmarks/opencl/saxpy/Makefile | 14 +- benchmarks/opencl/saxpy/main.cc | 2 +- benchmarks/opencl/sfilter/Makefile | 14 +- benchmarks/opencl/sgemm/Makefile | 14 +- benchmarks/opencl/vecadd/Makefile | 14 +- 34 files changed, 5627 insertions(+), 37 deletions(-) create mode 100644 benchmarks/opencl/guassian/Makefile create mode 100755 benchmarks/opencl/guassian/OriginalParallel.c create mode 100755 benchmarks/opencl/guassian/README.txt create mode 100755 benchmarks/opencl/guassian/clutils.cpp create mode 100755 benchmarks/opencl/guassian/clutils.h create mode 100755 benchmarks/opencl/guassian/gaussianElim.h create mode 100755 benchmarks/opencl/guassian/gettimeofday.cpp create mode 100755 benchmarks/opencl/guassian/gettimeofday.h create mode 100755 benchmarks/opencl/guassian/kernel.cl create mode 100644 benchmarks/opencl/guassian/libgaussian.a create mode 100755 benchmarks/opencl/guassian/main.cc create mode 100755 benchmarks/opencl/guassian/matrix4.txt create mode 100755 benchmarks/opencl/guassian/run create mode 100755 benchmarks/opencl/guassian/utils.cpp create mode 100755 benchmarks/opencl/guassian/utils.h create mode 100755 benchmarks/opencl/kmeans/cluster.c create mode 100755 benchmarks/opencl/kmeans/getopt.c create mode 100755 benchmarks/opencl/kmeans/getopt.h create mode 100755 benchmarks/opencl/kmeans/kernel.cl create mode 100755 benchmarks/opencl/kmeans/kmeans create mode 100755 benchmarks/opencl/kmeans/kmeans.h create mode 100755 benchmarks/opencl/kmeans/kmeans_clustering.c create mode 100644 benchmarks/opencl/kmeans/libkmeans.a create mode 100755 benchmarks/opencl/kmeans/main.cc create mode 100755 benchmarks/opencl/kmeans/read_input.c create mode 100755 benchmarks/opencl/kmeans/rmse.c create mode 100755 benchmarks/opencl/kmeans/run diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index b085b35f..98594a82 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH) VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a -PROJECT=bfs +PROJECT = bfs + +SRCS = main.cc all: $(PROJECT).dump $(PROJECT).hex lib$(PROJECT).a: kernel.cl POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl -$(PROJECT).elf: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf -$(PROJECT).qemu: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu $(PROJECT).hex: $(PROJECT).elf $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex @@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu $(GDB) $(PROJECT).qemu clean: - rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu \ No newline at end of file + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile new file mode 100644 index 00000000..5e5d6b79 --- /dev/null +++ b/benchmarks/opencl/guassian/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) +POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) +POCL_INC_PATH = $(wildcard ../include) +POCL_LIB_PATH = $(wildcard ../lib) +VX_RT_PATH = $(wildcard ../../../runtime) +VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a + +PROJECT = gaussian + +SRCS = main.cc clutils.cpp utils.cpp + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/opencl/guassian/OriginalParallel.c b/benchmarks/opencl/guassian/OriginalParallel.c new file mode 100755 index 00000000..6a899b68 --- /dev/null +++ b/benchmarks/opencl/guassian/OriginalParallel.c @@ -0,0 +1,241 @@ +/*----------------------------------------------------------- +** ge_p.c -- The program is to solve a linear system Ax = b +** by using Gaussian Elimination. The algorithm on page 101 +** ("Foundations of Parallel Programming") is used. +** The sequential version is ge_s.c. This parallel +** implementation converts three independent for() loops +** into three Fans. Use the data file ge_3.dat to verify +** the correction of the output. +** +** Written by Andreas Kura, 02/15/95 +** Modified by Chong-wei Xu, /04/20/95 +**----------------------------------------------------------- +*/ +#include +#include + +int Size, t; +float **a, *b; +BEGIN_SHARED_DECL + float **m; +END_SHARED_DECL; +FILE *fp; + +void InitProblemOnce(); +void InitPerRun(); +void ForwardSub(); +void Fan1(); +void Fan2(); +void Fan3(); +void InitMat(); +void InitAry(); +void PrintMat(); +void PrintAry(); + +main () +{ + InitializeUs(); + MakeSharedVariables; /* to make SHARED m */ + + InitProblemOnce(); + InitPerRun(); + ForwardSub(); + + printf("The result of matrix m is: \n"); + PrintMat(SHARED m, Size, Size); + printf("The result of matrix a is: \n"); + PrintMat(a, Size, Size); + printf("The result of array b is: \n"); + PrintAry(b, Size); +} + +/*------------------------------------------------------ +** InitProblemOnce -- Initialize all of matrices and +** vectors by opening a data file specified by the user. +** +** We used dynamic array **a, *b, and **m to allocate +** the memory storages. +**------------------------------------------------------ +*/ +void InitProblemOnce() +{ + char filename[30]; + + printf("Enter the data file name: "); + scanf("%s", filename); + printf("The file name is: %s\n", filename); + + fp = fopen(filename, "r"); + + fscanf(fp, "%d", &Size); + a = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float)); + /* + a = (float **) malloc(Size * sizeof(float *)); + for (i=0; i +#include +#include +#include + +#include + +#include "clutils.h" +#include "utils.h" + + +// The following variables have file scope to simplify +// the utility functions + +//! All discoverable OpenCL platforms +static cl_platform_id* platforms = NULL; +static cl_uint numPlatforms; + +//! All discoverable OpenCL devices (one pointer per platform) +static cl_device_id* devices = NULL; +static cl_uint* numDevices; + +//! The chosen OpenCL platform +static cl_platform_id platform = NULL; + +//! The chosen OpenCL device +static cl_device_id device = NULL; + +//! OpenCL context +static cl_context context = NULL; + +//! OpenCL command queue +static cl_command_queue commandQueue = NULL; +static cl_command_queue commandQueueProf = NULL; +static cl_command_queue commandQueueNoProf = NULL; + +//! Global status of events +static bool eventsEnabled = false; + + +//------------------------------------------------------- +// Initialization and Cleanup +//------------------------------------------------------- + +//! Initialize OpenCl environment on one device +/*! + Init function for one device. Looks for supported devices and creates a context + \return returns a context initialized +*/ +/*cl_context cl_init(char devicePreference) +{ + cl_int status; + + // Discover and populate the platforms + status = clGetPlatformIDs(0, NULL, &numPlatforms); + cl_errChk(status, "Getting platform IDs", true); + if (numPlatforms > 0) + { + // Get all the platforms + platforms = (cl_platform_id*)alloc(numPlatforms * + sizeof(cl_platform_id)); + + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + cl_errChk(status, "Getting platform IDs", true); + } + else + { + // If no platforms are available, we shouldn't continue + printf("No OpenCL platforms found\n"); + exit(-1); + } + + // Allocate space for the device lists and lengths + numDevices = (cl_uint*)alloc(sizeof(cl_uint)*numPlatforms); + devices = (cl_device_id**)alloc(sizeof(cl_device_id*)*numPlatforms); + + // If a device preference was supplied, we'll limit the search of devices + // based on type + cl_device_type deviceType = CL_DEVICE_TYPE_ALL; + if(devicePreference == 'c') { + deviceType = CL_DEVICE_TYPE_CPU; + } + if(devicePreference == 'g') { + deviceType = CL_DEVICE_TYPE_GPU; + } + + // Traverse the platforms array printing information and + // populating devices + for(unsigned int i = 0; i < numPlatforms ; i++) + { + // Print out some basic info about the platform + char* platformName = NULL; + char* platformVendor = NULL; + + platformName = cl_getPlatformName(platforms[i]); + platformVendor = cl_getPlatformVendor(platforms[i]); + + status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices[i]); + cl_errChk(status, "Getting device IDs", false); + if(status != CL_SUCCESS) { + printf("This is a known NVIDIA bug (if platform == AMD then die)\n"); + printf("Setting number of devices to 0 and continuing\n"); + numDevices[i] = 0; + } + + printf("Platform %d (%d devices):\n", i, numDevices[i]); + printf("\tName: %s\n", platformName); + printf("\tVendor: %s\n", platformVendor); + + free(platformName); + free(platformVendor); + + // Populate OpenCL devices if any exist + if(numDevices[i] != 0) + { + // Allocate an array of devices of size "numDevices" + devices[i] = (cl_device_id*)alloc(sizeof(cl_device_id)*numDevices[i]); + + // Populate Arrray with devices + status = clGetDeviceIDs(platforms[i], deviceType, numDevices[i], + devices[i], NULL); + cl_errChk(status, "Getting device IDs", true); + } + + // Print some information about each device + for( unsigned int j = 0; j < numDevices[i]; j++) + { + char* deviceName = NULL; + char* deviceVendor = NULL; + + printf("\tDevice %d:\n", j); + + deviceName = cl_getDeviceName(devices[i][j]); + deviceVendor = cl_getDeviceVendor(devices[i][j]); + + printf("\t\tName: %s\n", deviceName); + printf("\t\tVendor: %s\n", deviceVendor); + + free(deviceName); + free(deviceVendor); + } + } + + // Hard-code in the platform/device to use, or uncomment 'scanf' + // to decide at runtime + cl_uint chosen_platform, chosen_device; + // UNCOMMENT the following two lines to manually select device each time + //printf("Enter Platform and Device No (Seperated by Space) \n"); + //scanf("%d %d", &chosen_platform, &chosen_device); + chosen_platform = 0; + chosen_device = 0; + printf("Using Platform %d, Device %d \n", chosen_platform, chosen_device); + + // Do a sanity check of platform/device selection + if(chosen_platform >= numPlatforms || + chosen_device >= numDevices[chosen_platform]) { + printf("Invalid platform/device combination\n"); + exit(-1); + } + + // Set the selected platform and device + platform = platforms[chosen_platform]; + device = devices[chosen_platform][chosen_device]; + + // Create the context + cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platform), 0}; + context = clCreateContext(cps, 1, &device, NULL, NULL, &status); + cl_errChk(status, "Creating context", true); + + // Create the command queue + commandQueueProf = clCreateCommandQueue(context, device, + CL_QUEUE_PROFILING_ENABLE, &status); + cl_errChk(status, "creating command queue", true); + + commandQueueNoProf = clCreateCommandQueue(context, device, 0, &status); + cl_errChk(status, "creating command queue", true); + + if(eventsEnabled) { + printf("Profiling enabled\n"); + commandQueue = commandQueueProf; + } + else { + printf("Profiling disabled\n"); + commandQueue = commandQueueNoProf; + } + + return context; +}*/ + +cl_context cl_init_context(int platform, int dev,int quiet) { + int printInfo=1; + if (platform >= 0 && dev >= 0) printInfo = 0; + cl_int status; + // Used to iterate through the platforms and devices, respectively + cl_uint numPlatforms; + cl_uint numDevices; + + // These will hold the platform and device we select (can potentially be + // multiple, but we're just doing one for now) + // cl_platform_id platform = NULL; + + /*status = clGetPlatformIDs(0, NULL, &numPlatforms); + if (printInfo) printf("Number of platforms detected:%d\n", numPlatforms); + + // Print some information about the available platforms + cl_platform_id *platforms = NULL; + cl_device_id * devices = NULL; + if (numPlatforms > 0) + { + // get all the platforms + platforms = (cl_platform_id*)malloc(numPlatforms * + sizeof(cl_platform_id)); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + + // Traverse the platforms array + if (printInfo) printf("Checking For OpenCl Compatible Devices\n"); + for(unsigned int i = 0; i < numPlatforms ; i++) + { + char pbuf[100]; + if (printInfo) printf("Platform %d:\t", i); + status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, + sizeof(pbuf), pbuf, NULL); + if (printInfo) printf("Vendor: %s\n", pbuf); + + //unsigned int numDevices; + + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); + if(cl_errChk(status, "checking for devices",true)) + exit(1); + if(numDevices == 0) { + printf("There are no devices for Platform %d\n",i); + exit(0); + } + else + { + if (printInfo) printf("\tNo of devices for Platform %d is %u\n",i, numDevices); + //! Allocate an array of devices of size "numDevices" + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); + //! Populate Arrray with devices + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, + devices, NULL); + if(cl_errChk(status, "getting device IDs",true)) { + exit(1); + } + } + for( unsigned int j = 0; j < numDevices; j++) + { + char dbuf[100]; + char deviceStr[100]; + if (printInfo) printf("\tDevice: %d\t", j); + status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuf), + deviceStr, NULL); + cl_errChk(status, "Getting Device Info\n",true); + if (printInfo) printf("Vendor: %s", deviceStr); + status = clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(dbuf), + dbuf, NULL); + if (printInfo) printf("\n\t\tName: %s\n", dbuf); + } + } + } + else + { + // If no platforms are available, we're sunk! + printf("No OpenCL platforms found\n"); + exit(0); + } + + int platform_touse; + unsigned int device_touse; + if (printInfo) printf("Enter Platform and Device No (Seperated by Space) \n"); + if (printInfo) scanf("%d %d", &platform_touse, &device_touse); + else { + platform_touse = platform; + device_touse = dev; + } + if (!quiet) printf("Using Platform %d \t Device No %d \n",platform_touse, device_touse); + + //! Recheck how many devices does our chosen platform have + status = clGetDeviceIDs(platforms[platform_touse], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); + + if(device_touse > numDevices) + { + printf("Invalid Device Number\n"); + exit(1); + } + + //! Populate devices array with all the visible devices of our chosen platform + devices = (cl_device_id *)malloc(sizeof(cl_device_id)*numDevices); + status = clGetDeviceIDs(platforms[platform_touse], + CL_DEVICE_TYPE_ALL, numDevices, + devices, NULL); + if(cl_errChk(status,"Error in Getting Devices\n",true)) exit(1); + + + //!Check if Device requested is a CPU or a GPU + cl_device_type dtype; + device = devices[device_touse]; + status = clGetDeviceInfo(devices[device_touse], + CL_DEVICE_TYPE, + sizeof(dtype), + (void *)&dtype, + NULL); + if(cl_errChk(status,"Error in Getting Device Info\n",true)) exit(1); + if(dtype == CL_DEVICE_TYPE_GPU) { + if (!quiet) printf("Creating GPU Context\n\n"); + } + else if (dtype == CL_DEVICE_TYPE_CPU) { + if (!quiet) printf("Creating CPU Context\n\n"); + } + else perror("This Context Type Not Supported\n"); + + cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platforms[platform_touse]), 0}; + + cl_context_properties *cprops = cps; + + context = clCreateContextFromType( + cprops, (cl_device_type)dtype, + NULL, NULL, &status); + if(cl_errChk(status, "creating Context",true)) { + exit(1); + }*/ + + // Getting platform and device information + + numPlatforms = 1; + numDevices = 1; + int platform_touse = 0; + int device_touse = 0; + platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); + + status = clGetPlatformIDs(1, platforms, NULL); + cl_errChk(status, "Oops!", true); + status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL); + cl_errChk(status, "Oops!", true); + context = clCreateContext(NULL, 1, devices, NULL, NULL, &status); + cl_errChk(status, "Oops!", true); + + device=devices[device_touse]; + +#define PROFILING + +#ifdef PROFILING + + commandQueue = clCreateCommandQueue(context, + devices[device_touse], CL_QUEUE_PROFILING_ENABLE, &status); + +#else + + clCommandQueue = clCreateCommandQueue(clGPUContext, + devices[device_touse], NULL, &status); + +#endif // PROFILING + + if(cl_errChk(status, "creating command queue",true)) { + exit(1); + } + return context; +} +/*! + Release all resources that the user doesn't have access to. +*/ +void cl_cleanup() +{ + // Free the command queue + if(commandQueue) { + clReleaseCommandQueue(commandQueue); + } + + // Free the context + if(context) { + clReleaseContext(context); + } + + free(devices); + free(numDevices); + + // Free the platforms + free(platforms); +} + +//! Release a kernel object +/*! + \param mem The kernel object to release +*/ +void cl_freeKernel(cl_kernel kernel) +{ + cl_int status; + + if(kernel != NULL) { + status = clReleaseKernel(kernel); + cl_errChk(status, "Releasing kernel object", true); + } +} + +//! Release memory allocated on the device +/*! + \param mem The device pointer to release +*/ +void cl_freeMem(cl_mem mem) +{ + cl_int status; + + if(mem != NULL) { + status = clReleaseMemObject(mem); + cl_errChk(status, "Releasing mem object", true); + } +} + +//! Release a program object +/*! + \param mem The program object to release +*/ +void cl_freeProgram(cl_program program) +{ + cl_int status; + + if(program != NULL) { + status = clReleaseProgram(program); + cl_errChk(status, "Releasing program object", true); + } +} + +//! Returns a reference to the command queue +/*! + Returns a reference to the command queue \n + Used for any OpenCl call that needs the command queue declared in clutils.cpp +*/ +cl_command_queue cl_getCommandQueue() +{ + return commandQueue; +} + +//------------------------------------------------------- +// Synchronization functions +//------------------------------------------------------- + +/*! + Wait till all pending commands in queue are finished +*/ +void cl_sync() +{ + clFinish(commandQueue); +} + + +//------------------------------------------------------- +// Memory allocation +//------------------------------------------------------- + +//! Allocate a buffer on a device +/*! + \param mem_size Size of memory in bytes + \param flags Optional cl_mem_flags + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocBuffer(size_t mem_size, cl_mem_flags flags) +{ + cl_mem mem; + cl_int status; + + /*! + Logging information for keeping track of device memory + */ + static int allocationCount = 1; + static size_t allocationSize = 0; + + allocationCount++; + allocationSize += mem_size; + + mem = clCreateBuffer(context, flags, mem_size, NULL, &status); + + cl_errChk(status, "creating buffer", true); + + return mem; +} + +//! Allocate constant memory on device +/*! + \param mem_size Size of memory in bytes + \param host_ptr Host pointer that contains the data + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr) +{ + cl_mem mem; + cl_int status; + + mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + mem_size, host_ptr, &status); + cl_errChk(status, "Error creating const mem buffer", true); + + return mem; +} + +//! Allocate a buffer on device pinning the host memory at host_ptr +/*! + \param mem_size Size of memory in bytes + \return Returns a cl_mem object that points to pinned memory on the host +*/ +cl_mem cl_allocBufferPinned(size_t mem_size) +{ + cl_mem mem; + cl_int status; + + mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + mem_size, NULL, &status); + cl_errChk(status, "Error allocating pinned memory", true); + + return mem; +} + +//! Allocate an image on a device +/*! + \param height Number of rows in the image + \param width Number of columns in the image + \param elemSize Size of the elements in the image + \param flags Optional cl_mem_flags + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocImage(size_t height, size_t width, char type, cl_mem_flags flags) +{ + cl_mem mem; + cl_int status; + + size_t elemSize = 0; + + cl_image_format format; + format.image_channel_order = CL_R; + + switch(type) { + case 'f': + elemSize = sizeof(float); + format.image_channel_data_type = CL_FLOAT; + break; + case 'i': + elemSize = sizeof(int); + format.image_channel_data_type = CL_SIGNED_INT32; + break; + default: + printf("Error creating image: Unsupported image type.\n"); + exit(-1); + } + + /*! + Logging information for keeping track of device memory + */ + static int allocationCount = 1; + static size_t allocationSize = 0; + + allocationCount++; + allocationSize += height*width*elemSize; + + // Create the image + mem = clCreateImage2D(context, flags, &format, width, height, 0, NULL, &status); + + //cl_errChk(status, "creating image", true); + if(status != CL_SUCCESS) { + printf("Error creating image: Images may not be supported for this device.\n"); + printSupportedImageFormats(); + getchar(); + exit(-1); + } + + return mem; +} + + +//------------------------------------------------------- +// Data transfers +//------------------------------------------------------- + + +// Copy and map a buffer +void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size) { + + void* ptr; // Pointer to the pinned memory that will be returned + + cl_copyBufferToBuffer(dst, src, size); + + ptr = cl_mapBuffer(dst, size, CL_MAP_READ); + + return ptr; +} + +// Copy a buffer +void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size) +{ + cl_int status; + status = clEnqueueCopyBuffer(commandQueue, src, dst, 0, 0, size, 0, NULL, + NULL); + cl_errChk(status, "Copying buffer", true); + +} + +//! Copy a buffer to the device +/*! + \param dst Valid device pointer + \param src Host pointer that contains the data + \param mem_size Size of data to copy + \param blocking Blocking or non-blocking operation +*/ +void cl_copyBufferToDevice(cl_mem dst, void* src, size_t mem_size, cl_bool blocking) +{ + cl_int status; + status = clEnqueueWriteBuffer(commandQueue, dst, blocking, 0, + mem_size, src, 0, NULL, NULL); + cl_errChk(status, "Writing buffer", true); + +} + +//! Copy a buffer to the host +/*! + \param dst Valid host pointer + \param src Device pointer that contains the data + \param mem_size Size of data to copy + \param blocking Blocking or non-blocking operation +*/ +void cl_copyBufferToHost(void* dst, cl_mem src, size_t mem_size, cl_bool blocking) +{ + cl_int status; + status = clEnqueueReadBuffer(commandQueue, src, blocking, 0, + mem_size, dst, 0, NULL, NULL); + cl_errChk(status, "Reading buffer", true); + +} + +//! Copy a buffer to a 2D image +/*! + \param src Valid device buffer + \param dst Empty device image + \param mem_size Size of data to copy +*/ +void cl_copyBufferToImage(cl_mem buffer, cl_mem image, int height, int width) +{ + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + cl_int status; + status = clEnqueueCopyBufferToImage(commandQueue, buffer, image, 0, + origin, region, 0, NULL, NULL); + cl_errChk(status, "Copying buffer to image", true); + +} + +// Copy data to an image on the device +/*! + \param dst Valid device pointer + \param src Host pointer that contains the data + \param height Height of the image + \param width Width of the image +*/ +void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width) +{ + cl_int status; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + status = clEnqueueWriteImage(commandQueue, dst, CL_TRUE, origin, + region, 0, 0, src, 0, NULL, NULL); + cl_errChk(status, "Writing image", true); +} + +//! Copy an image to the host +/*! + \param dst Valid host pointer + \param src Device pointer that contains the data + \param height Height of the image + \param width Width of the image +*/ +void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width) +{ + cl_int status; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + status = clEnqueueReadImage(commandQueue, src, CL_TRUE, origin, + region, 0, 0, dst, 0, NULL, NULL); + cl_errChk(status, "Reading image", true); +} + +//! Map a buffer into a host address +/*! + \param mem cl_mem object + \param mem_size Size of memory in bytes + \param flags Optional cl_mem_flags + \return Returns a host pointer that points to the mapped region +*/ +void *cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags) +{ + cl_int status; + void *ptr; + + ptr = (void *)clEnqueueMapBuffer(commandQueue, mem, CL_TRUE, flags, + 0, mem_size, 0, NULL, NULL, &status); + + cl_errChk(status, "Error mapping a buffer", true); + + return ptr; +} + +//! Unmap a buffer or image +/*! + \param mem cl_mem object + \param ptr A host pointer that points to the mapped region +*/ +void cl_unmapBuffer(cl_mem mem, void *ptr) +{ + + // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the + // commented code here until it's supported + + cl_int status; + + status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL); + + cl_errChk(status, "Error unmapping a buffer or image", true); +} + +void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size) +{ + + void* ptr; + + ptr = cl_mapBuffer(mem, size, CL_MAP_WRITE); + + memcpy(ptr, data, size); + + cl_unmapBuffer(mem, ptr); +} + +//------------------------------------------------------- +// Program and kernels +//------------------------------------------------------- + +//! Convert source code file into cl_program +/*! +Compile Opencl source file into a cl_program. The cl_program will be made into a kernel in PrecompileKernels() + +\param kernelPath Filename of OpenCl code +\param compileoptions Compilation options +\param verbosebuild Switch to enable verbose Output +*/ +cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbosebuild ) +{ + cl_int status; + FILE *fp = NULL; + char *source = NULL; + long int size; + + /*printf("\t%s\n", kernelPath); + + // Determine the size of the source file +#ifdef _WIN32 + fopen_s(&fp, kernelPath, "rb"); +#else + fp = fopen(kernelPath, "rb"); +#endif + if(!fp) { + printf("Could not open kernel file\n"); + exit(-1); + } + status = fseek(fp, 0, SEEK_END); + if(status != 0) { + printf("Error seeking to end of file\n"); + exit(-1); + } + size = ftell(fp); + if(size < 0) { + printf("Error getting file position\n"); + exit(-1); + } + rewind(fp); + + // Allocate enough space for the source code + source = (char *)alloc(size + 1); + + // fill with NULLs (just for fun) + for (int i = 0; i < size+1; i++) { + source[i] = '\0'; + } + + // Read in the source code + fread(source, 1, size, fp); + source[size] = '\0';*/ + + // Create the program object + //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); + cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); + cl_errChk(status, "Creating program", true); + + free(source); + fclose(fp); + + // Try to compile the program + status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); + if(cl_errChk(status, "Building program", false) || verbosebuild == 1) + { + + cl_build_status build_status; + + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_STATUS, + sizeof(cl_build_status), &build_status, NULL); + + if(build_status == CL_SUCCESS && verbosebuild == 0) { + return clProgramReturn; + } + + //char *build_log; + size_t ret_val_size; + printf("Device: %p",device); + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG, 0, + NULL, &ret_val_size); + + char *build_log = (char*)alloc(ret_val_size+1); + + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG, + ret_val_size+1, build_log, NULL); + + // to be careful, terminate with \0 + // there's no information in the reference whether the string is 0 + // terminated or not + build_log[ret_val_size] = '\0'; + + printf("Build log:\n %s...\n", build_log); + if(build_status != CL_SUCCESS) { + getchar(); + exit(-1); + } + else + return clProgramReturn; + } + + // print the ptx information + // printBinaries(clProgram); + + return clProgramReturn; +} + +//! Create a kernel from compiled source +/*! +Create a kernel from compiled source + +\param program Compiled OpenCL program +\param kernel_name Name of the kernel in the program +\return Returns a cl_kernel object for the specified kernel +*/ +cl_kernel cl_createKernel(cl_program program, const char* kernel_name) { + + cl_kernel kernel; + cl_int status; + + kernel = clCreateKernel(program, kernel_name, &status); + cl_errChk(status, "Creating kernel", true); + + return kernel; +} + +//! Set an argument for a OpenCL kernel +/*! +Set an argument for a OpenCL kernel + +\param kernel The kernel for which the argument is being set +\param index The argument index +\param size The size of the argument +\param data A pointer to the argument +*/ +void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size, + void* data) +{ + cl_int status; + status = clSetKernelArg(kernel, index, size, data); + + cl_errChk(status, "Setting kernel arg", true); +} + + +//------------------------------------------------------- +// Profiling/events +//------------------------------------------------------- + + +//! Time kernel execution using cl_event +/*! + Prints out the time taken between the start and end of an event + \param event_time +*/ +double cl_computeExecTime(cl_event event_time) +{ + cl_int status; + cl_ulong starttime; + cl_ulong endtime; + + double elapsed; + + status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &starttime, NULL); + cl_errChk(status, "profiling start", true); + + status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &endtime, NULL); + cl_errChk(status, "profiling end", true); + + // Convert to ms + elapsed = (double)(endtime-starttime)/1000000.0; + + return elapsed; +} + +//! Compute the elapsed time between two timer values +double cl_computeTime(cl_time start, cl_time end) +{ +#ifdef _WIN32 + __int64 freq; + int status; + + status = QueryPerformanceFrequency((LARGE_INTEGER*)&freq); + if(status == 0) { + perror("QueryPerformanceFrequency"); + exit(-1); + } + + // Return time in ms + return double(end-start)/(double(freq)/1000.0); +#else + + return end-start; +#endif +} + +//! Grab the current time using a system-specific timer +void cl_getTime(cl_time* time) +{ + +#ifdef _WIN32 + int status = QueryPerformanceCounter((LARGE_INTEGER*)time); + if(status == 0) { + perror("QueryPerformanceCounter"); + exit(-1); + } +#else + // Use gettimeofday to get the current time + struct timeval curTime; + gettimeofday(&curTime, NULL); + + // Convert timeval into double + *time = curTime.tv_sec * 1000 + (double)curTime.tv_usec/1000; +#endif +} + + + +//------------------------------------------------------- +// Error handling +//------------------------------------------------------- + +//! OpenCl error code list +/*! + An array of character strings used to give the error corresponding to the error code \n + + The error code is the index within this array +*/ +char *cl_errs[MAX_ERR_VAL] = { + (char *)"CL_SUCCESS", // 0 + (char *)"CL_DEVICE_NOT_FOUND", //-1 + (char *)"CL_DEVICE_NOT_AVAILABLE", //-2 + (char *)"CL_COMPILER_NOT_AVAILABLE", //-3 + (char *)"CL_MEM_OBJECT_ALLOCATION_FAILURE", //-4 + (char *)"CL_OUT_OF_RESOURCES", //-5 + (char *)"CL_OUT_OF_HOST_MEMORY", //-6 + (char *)"CL_PROFILING_INFO_NOT_AVAILABLE", //-7 + (char *)"CL_MEM_COPY_OVERLAP", //-8 + (char *)"CL_IMAGE_FORMAT_MISMATCH", //-9 + (char *)"CL_IMAGE_FORMAT_NOT_SUPPORTED", //-10 + (char *)"CL_BUILD_PROGRAM_FAILURE", //-11 + (char *)"CL_MAP_FAILURE", //-12 + (char *)"", //-13 + (char *)"", //-14 + (char *)"", //-15 + (char *)"", //-16 + (char *)"", //-17 + (char *)"", //-18 + (char *)"", //-19 + (char *)"", //-20 + (char *)"", //-21 + (char *)"", //-22 + (char *)"", //-23 + (char *)"", //-24 + (char *)"", //-25 + (char *)"", //-26 + (char *)"", //-27 + (char *)"", //-28 + (char *)"", //-29 + (char *)"CL_INVALID_VALUE", //-30 + (char *)"CL_INVALID_DEVICE_TYPE", //-31 + (char *)"CL_INVALID_PLATFORM", //-32 + (char *)"CL_INVALID_DEVICE", //-33 + (char *)"CL_INVALID_CONTEXT", //-34 + (char *)"CL_INVALID_QUEUE_PROPERTIES", //-35 + (char *)"CL_INVALID_COMMAND_QUEUE", //-36 + (char *)"CL_INVALID_HOST_PTR", //-37 + (char *)"CL_INVALID_MEM_OBJECT", //-38 + (char *)"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", //-39 + (char *)"CL_INVALID_IMAGE_SIZE", //-40 + (char *)"CL_INVALID_SAMPLER", //-41 + (char *)"CL_INVALID_BINARY", //-42 + (char *)"CL_INVALID_BUILD_OPTIONS", //-43 + (char *)"CL_INVALID_PROGRAM", //-44 + (char *)"CL_INVALID_PROGRAM_EXECUTABLE", //-45 + (char *)"CL_INVALID_KERNEL_NAME", //-46 + (char *)"CL_INVALID_KERNEL_DEFINITION", //-47 + (char *)"CL_INVALID_KERNEL", //-48 + (char *)"CL_INVALID_ARG_INDEX", //-49 + (char *)"CL_INVALID_ARG_VALUE", //-50 + (char *)"CL_INVALID_ARG_SIZE", //-51 + (char *)"CL_INVALID_KERNEL_ARGS", //-52 + (char *)"CL_INVALID_WORK_DIMENSION ", //-53 + (char *)"CL_INVALID_WORK_GROUP_SIZE", //-54 + (char *)"CL_INVALID_WORK_ITEM_SIZE", //-55 + (char *)"CL_INVALID_GLOBAL_OFFSET", //-56 + (char *)"CL_INVALID_EVENT_WAIT_LIST", //-57 + (char *)"CL_INVALID_EVENT", //-58 + (char *)"CL_INVALID_OPERATION", //-59 + (char *)"CL_INVALID_GL_OBJECT", //-60 + (char *)"CL_INVALID_BUFFER_SIZE", //-61 + (char *)"CL_INVALID_MIP_LEVEL", //-62 + (char *)"CL_INVALID_GLOBAL_WORK_SIZE"}; //-63 + +//! OpenCl Error checker +/*! +Checks for error code as per cl_int returned by OpenCl +\param status Error value as cl_int +\param msg User provided error message +\return True if Error Seen, False if no error +*/ +int cl_errChk(const cl_int status, const char * msg, bool exitOnErr) +{ + + if(status != CL_SUCCESS) { + printf("OpenCL Error: %d %s %s\n", status, cl_errs[-status], msg); + + if(exitOnErr) { + exit(-1); + } + + return true; + } + return false; +} + +// Queries the supported image formats for the device and prints +// them to the screen + void printSupportedImageFormats() +{ + cl_uint numFormats; + cl_int status; + + status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, + 0, NULL, &numFormats); + cl_errChk(status, "getting supported image formats", true); + + cl_image_format* imageFormats = NULL; + imageFormats = (cl_image_format*)alloc(sizeof(cl_image_format)*numFormats); + + status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, + numFormats, imageFormats, NULL); + + printf("There are %d supported image formats\n", numFormats); + + cl_uint orders[]={CL_R, CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, + CL_RA, CL_RGB, CL_RGBA, CL_ARGB, CL_BGRA}; + char *orderstr[]={(char *)"CL_R", (char *)"CL_A",(char *)"CL_INTENSITY", (char *)"CL_LUMINANCE", (char *)"CL_RG", + (char *)"CL_RA", (char *)"CL_RGB", (char *)"CL_RGBA", (char *)"CL_ARGB", (char *)"CL_BGRA"}; + + cl_uint types[]={ + CL_SNORM_INT8 , CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, + CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,CL_SIGNED_INT8, + CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, + CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; + + char * typesstr[]={ + (char *)"CL_SNORM_INT8" ,(char *)"CL_SNORM_INT16",(char *)"CL_UNORM_INT8",(char *)"CL_UNORM_INT16", + (char *)"CL_UNORM_SHORT_565",(char *)"CL_UNORM_SHORT_555",(char *)"CL_UNORM_INT_101010", + (char *)"CL_SIGNED_INT8",(char *)"CL_SIGNED_INT16",(char *)"CL_SIGNED_INT32",(char *)"CL_UNSIGNED_INT8", + (char *)"CL_UNSIGNED_INT16",(char *)"CL_UNSIGNED_INT32",(char *)"CL_HALF_FLOAT",(char *)"CL_FLOAT"}; + + printf("Supported Formats:\n"); + for(int i = 0; i < (int)numFormats; i++) { + printf("\tFormat %d: ", i); + + for(int j = 0; j < (int)(sizeof(orders)/sizeof(cl_int)); j++) { + if(imageFormats[i].image_channel_order == orders[j]) { + printf("%s, ", orderstr[j]); + } + } + for(int j = 0; j < (int)(sizeof(types)/sizeof(cl_int)); j++) { + if(imageFormats[i].image_channel_data_type == types[j]) { + printf("%s, ", typesstr[j]); + } + } + printf("\n"); + } + + free(imageFormats); +} + + +//------------------------------------------------------- +// Platform and device information +//------------------------------------------------------- + +//! Returns true if AMD is the device vendor +bool cl_deviceIsAMD(cl_device_id dev) { + + bool retval = false; + + char* vendor = cl_getDeviceVendor(dev); + + if(strncmp(vendor, "Advanced", 8) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Returns true if NVIDIA is the device vendor +bool cl_deviceIsNVIDIA(cl_device_id dev) { + + bool retval = false; + + char* vendor = cl_getDeviceVendor(dev); + + if(strncmp(vendor, "NVIDIA", 6) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Returns true if NVIDIA is the device vendor +bool cl_platformIsNVIDIA(cl_platform_id plat) { + + bool retval = false; + + char* vendor = cl_getPlatformVendor(plat); + + if(strncmp(vendor, "NVIDIA", 6) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Get the name of the vendor for a device +char* cl_getDeviceDriverVersion(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getDeviceName(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the name + status = clGetDeviceInfo(dev, CL_DEVICE_NAME, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting device name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_NAME, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting device name", true); + + return(devInfoStr); +} + +//! Get the name of the vendor for a device +char* cl_getDeviceVendor(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! Get the name of the vendor for a device +char* cl_getDeviceVersion(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getPlatformName(cl_platform_id platform) +{ + cl_int status; + size_t platformInfoSize; + char* platformInfoStr = NULL; + + // Print the name + status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, + NULL, &platformInfoSize); + cl_errChk(status, "Getting platform name", true); + + platformInfoStr = (char*)alloc(platformInfoSize); + + status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformInfoSize, + platformInfoStr, NULL); + cl_errChk(status, "Getting platform name", true); + + return(platformInfoStr); +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getPlatformVendor(cl_platform_id platform) +{ + cl_int status; + size_t platformInfoSize; + char* platformInfoStr = NULL; + + // Print the name + status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 0, + NULL, &platformInfoSize); + cl_errChk(status, "Getting platform name", true); + + platformInfoStr = (char*)alloc(platformInfoSize); + + status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, platformInfoSize, + platformInfoStr, NULL); + cl_errChk(status, "Getting platform name", true); + + return(platformInfoStr); +} + +//------------------------------------------------------- +// Utility functions +//------------------------------------------------------- + +//! Take a string and an int, and return a string +char* catStringWithInt(const char* string, int integer) { + + if(integer > 99999) { + printf("Can't handle event identifiers with 6 digits\n"); + exit(-1); + } + + // 5 characters for the identifier, 1 for the null terminator + int strLen = strlen(string)+5+1; + char* eventStr = (char*)alloc(sizeof(char)*strLen); + + char tmp[6]; + + strcpy(eventStr, string); + strncat(eventStr, itoa_portable(integer, tmp, 10), 5); + + return eventStr; +} + +/** + ** C++ version 0.4 char* style "itoa": + ** Written by Lukás Chmela + ** Released under GPLv3. + **/ +//portable itoa function +char* itoa_portable(int value, char* result, int base) { + // check that the base if valid + if (base < 2 || base > 36) { *result = '\0'; return result; } + + char* ptr = result, *ptr1 = result, tmp_char; + int tmp_value; + + do { + tmp_value = value; + value /= base; + *ptr++ = "zyxwvutsrqponmlkjihgfedcba9876543210123456789abcdefghijklmnopqrstuvwxyz" [35 + (tmp_value - value * base)]; + } while ( value ); + + //Apply negative sign + if (tmp_value < 0) *ptr++ = '-'; + *ptr-- = '\0'; + + while(ptr1 < ptr) { + tmp_char = *ptr; + *ptr--= *ptr1; + *ptr1++ = tmp_char; + } + + return result; +} diff --git a/benchmarks/opencl/guassian/clutils.h b/benchmarks/opencl/guassian/clutils.h new file mode 100755 index 00000000..51177d07 --- /dev/null +++ b/benchmarks/opencl/guassian/clutils.h @@ -0,0 +1,281 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#ifndef __CL_UTILS_H__ +#define __CL_UTILS_H__ + +#include + +// The cl_time type is OS specific +#ifdef _WIN32 +#include +#include +typedef __int64 cl_time; +#else +#include +typedef double cl_time; +#endif + +//------------------------------------------------------- +// Initialization and Cleanup +//------------------------------------------------------- + +// Detects platforms and devices, creates context and command queue +cl_context cl_init(char devicePreference='\0'); + +// Creates a context given a platform and a device +cl_context cl_init_context(int platform,int dev,int quiet=0); + +// Releases resources used by clutils +void cl_cleanup(); + +// Releases a kernel object +void cl_freeKernel(cl_kernel kernel); + +// Releases a memory object +void cl_freeMem(cl_mem mem); + +// Releases a program object +void cl_freeProgram(cl_program program); + +// Returns the global command queue +cl_command_queue cl_getCommandQueue(); + + +//------------------------------------------------------- +// Synchronization functions +//------------------------------------------------------- + +// Performs a clFinish on the command queue +void cl_sync(); + + +//------------------------------------------------------- +// Memory allocation +//------------------------------------------------------- + +// Allocates a regular buffer on the device +cl_mem cl_allocBuffer(size_t mem_size, + cl_mem_flags flags = CL_MEM_READ_WRITE); + +// XXX I don't think this does exactly what we want it to do +// Allocates a read-only buffer and transfers the data +cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr); + +// Allocates pinned memory on the host +cl_mem cl_allocBufferPinned(size_t mem_size); + +// Allocates an image on the device +cl_mem cl_allocImage(size_t height, size_t width, char type, + cl_mem_flags flags = CL_MEM_READ_WRITE); + + + +//------------------------------------------------------- +// Data transfers +//------------------------------------------------------- + +// Copies a buffer from the device to pinned memory on the host and +// maps it so it can be read +void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size); + +// Copies from one buffer to another +void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size); + +// Copies data to a buffer on the device +void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size, + cl_bool blocking = CL_TRUE); + +// Copies data to an image on the device +void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width); + +// Copies an image from the device to the host +void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width); + +// Copies data from a device buffer to the host +void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size, + cl_bool blocking = CL_TRUE); + +// Copies data from a buffer on the device to an image on the device +void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width); + +// Maps a buffer +void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags); + +// Unmaps a buffer +void cl_unmapBuffer(cl_mem mem, void *ptr); + +// Writes data to a zero-copy buffer on the device +void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size); + +//------------------------------------------------------- +// Program and kernels +//------------------------------------------------------- + +// Compiles a program +cl_program cl_compileProgram(char* kernelPath, char* compileoptions, + bool verboseoptions = 0); + +// Creates a kernel +cl_kernel cl_createKernel(cl_program program, const char* kernelName); + + +// Sets a kernel argument +void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size, + void* data); + + +//------------------------------------------------------- +// Profiling/events +//------------------------------------------------------- + +// Computes the execution time (start to end) for an event +double cl_computeExecTime(cl_event); + +// Compute the elapsed time between two CPU timer values +double cl_computeTime(cl_time start, cl_time end); + +// Creates an event from CPU timers +void cl_createUserEvent(cl_time start, cl_time end, char* desc); + +// Disable logging of events +void cl_disableEvents(); + +// Enable logging of events +void cl_enableEvents(); + +// Query the current system time +void cl_getTime(cl_time* time); + +// Calls a function which prints events to the terminal +void cl_printEvents(); + +// Calls a function which writes the events to a file +void cl_writeEventsToFile(char* path); + + +//------------------------------------------------------- +// Error handling +//------------------------------------------------------- + +// Compare a status value to CL_SUCCESS and optionally exit on error +int cl_errChk(const cl_int status, const char *msg, bool exitOnErr); + +// Queries the supported image formats for the device and prints +// them to the screen +void printSupportedImageFormats(); + +//------------------------------------------------------- +// Platform and device information +//------------------------------------------------------- + +bool cl_deviceIsAMD(cl_device_id dev=NULL); +bool cl_deviceIsNVIDIA(cl_device_id dev=NULL); +bool cl_platformIsNVIDIA(cl_platform_id plat=NULL); +char* cl_getDeviceDriverVersion(cl_device_id dev=NULL); +char* cl_getDeviceName(cl_device_id dev=NULL); +char* cl_getDeviceVendor(cl_device_id dev=NULL); +char* cl_getDeviceVersion(cl_device_id dev=NULL); +char* cl_getPlatformName(cl_platform_id platform); +char* cl_getPlatformVendor(cl_platform_id platform); + +//------------------------------------------------------- +// Utility functions +//------------------------------------------------------- + +char* catStringWithInt(const char* str, int integer); + +char* itoa_portable(int value, char* result, int base); + +//------------------------------------------------------- +// Data types +//------------------------------------------------------- +typedef struct{ + int x; + int y; +} int2; + +typedef struct{ + float x; + float y; +}float2; + +typedef struct{ + float x; + float y; + float z; + float w; +}float4; + +//------------------------------------------------------- +// Defines +//------------------------------------------------------- + +#define MAX_ERR_VAL 64 + +#define NUM_PROGRAMS 7 + +#define NUM_KERNELS 13 +#define KERNEL_INIT_DET 0 +#define KERNEL_BUILD_DET 1 +#define KERNEL_SURF_DESC 2 +#define KERNEL_NORM_DESC 3 +#define KERNEL_NON_MAX_SUP 4 +#define KERNEL_GET_ORIENT1 5 +#define KERNEL_GET_ORIENT2 6 +#define KERNEL_NN 7 +#define KERNEL_SCAN 8 +#define KERNEL_SCAN4 9 +#define KERNEL_TRANSPOSE 10 +#define KERNEL_SCANIMAGE 11 +#define KERNEL_TRANSPOSEIMAGE 12 + +#endif diff --git a/benchmarks/opencl/guassian/gaussianElim.h b/benchmarks/opencl/guassian/gaussianElim.h new file mode 100755 index 00000000..5d905d7e --- /dev/null +++ b/benchmarks/opencl/guassian/gaussianElim.h @@ -0,0 +1,40 @@ +#ifndef _GAUSSIANELIM +#define _GAUSSIANELIM + +#include +#include +#include +#include +#include +#include +#include + +#include "clutils.h" + +// All OpenCL headers +#if defined (__APPLE__) || defined(MACOSX) + #include +#else + #include +#endif + +float *OpenClGaussianElimination( + cl_context context, + int timing); + +void printUsage(); +int parseCommandline(int argc, char *argv[], char* filename, + int *q, int *t, int *p, int *d); + +void InitPerRun(int size,float *m); +void ForwardSub(cl_context context, float *a, float *b, float *m, int size,int timing); +void BackSub(float *a, float *b, float *finalVec, int size); +void Fan1(float *m, float *a, int Size, int t); +void Fan2(float *m, float *a, float *b,int Size, int j1, int t); +//void Fan3(float *m, float *b, int Size, int t); +void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol); +void InitAry(FILE *fp, float *ary, int ary_size); +void PrintMat(float *ary, int size, int nrow, int ncolumn); +void PrintAry(float *ary, int ary_size); +float eventTime(cl_event event,cl_command_queue command_queue); +#endif diff --git a/benchmarks/opencl/guassian/gettimeofday.cpp b/benchmarks/opencl/guassian/gettimeofday.cpp new file mode 100755 index 00000000..a0486593 --- /dev/null +++ b/benchmarks/opencl/guassian/gettimeofday.cpp @@ -0,0 +1,74 @@ +#include "stdio.h" +#include +#include +#include +//using namespace System; +using namespace std; + +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +struct timezone +{ + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + + +// Definition of a gettimeofday function + int gettimeofday(struct timeval *tv, struct timezone *tz) +{ +// Define a structure to receive the current Windows filetime + FILETIME ft; + +// Initialize the present time to 0 and the timezone to UTC + unsigned __int64 tmpres = 0; + static int tzflag = 0; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + +// The GetSystemTimeAsFileTime returns the number of 100 nanosecond +// intervals since Jan 1, 1601 in a structure. Copy the high bits to +// the 64 bit tmpres, shift it left by 32 then or in the low 32 bits. + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + +// Convert to microseconds by dividing by 10 + tmpres /= 10; + +// The Unix epoch starts on Jan 1 1970. Need to subtract the difference +// in seconds from Jan 1 1601. + tmpres -= DELTA_EPOCH_IN_MICROSECS; + +// Finally change microseconds to seconds and place in the seconds value. +// The modulus picks up the microseconds. + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + +// Adjust for the timezone west of Greenwich + long seconds_diff; + _get_timezone(&seconds_diff); + tz->tz_minuteswest = seconds_diff / 60; + int hours_offset; + _get_daylight(&hours_offset); + tz->tz_dsttime = hours_offset; + } + + return 0; +} + diff --git a/benchmarks/opencl/guassian/gettimeofday.h b/benchmarks/opencl/guassian/gettimeofday.h new file mode 100755 index 00000000..8db1f7a9 --- /dev/null +++ b/benchmarks/opencl/guassian/gettimeofday.h @@ -0,0 +1,17 @@ + +#ifdef _WIN32 +#include +/** +Based on code seen at. + +http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html + +http://msdn.microsoft.com/en-us/library/ms740560 + +*/ +int gettimeofday(struct timeval *tv, struct timezone *tz); +#else +#include +#endif + + diff --git a/benchmarks/opencl/guassian/kernel.cl b/benchmarks/opencl/guassian/kernel.cl new file mode 100755 index 00000000..c370e9b2 --- /dev/null +++ b/benchmarks/opencl/guassian/kernel.cl @@ -0,0 +1,49 @@ +//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +typedef struct latLong + { + float lat; + float lng; + } LatLong; + +__kernel void Fan1(__global float *m_dev, + __global float *a_dev, + __global float *b_dev, + const int size, + const int t) { + int globalId = get_global_id(0); + + if (globalId < size-1-t) { + *(m_dev + size * (globalId + t + 1)+t) = *(a_dev + size * (globalId + t + 1) + t) / *(a_dev + size * t + t); + } +} + + +__kernel void Fan2(__global float *m_dev, + __global float *a_dev, + __global float *b_dev, + const int size, + const int t) { + int globalId = get_global_id(0); + + int globalIdx = get_global_id(0); + int globalIdy = get_global_id(1); + if (globalIdx < size-1-t && globalIdy < size-t) { + a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)]; + + if(globalIdy == 0){ + b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t]; + } + } +// One dimensional +// int globalIdx = globalId % size; +// int globalIdy = globalId / size; +// +// if (globalIdx < size-1-t && globalIdy < size-t) { +// a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)]; +// } +// if(globalIdy == 0){ +// b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t]; +// } + +} diff --git a/benchmarks/opencl/guassian/libgaussian.a b/benchmarks/opencl/guassian/libgaussian.a new file mode 100644 index 0000000000000000000000000000000000000000..959207283b97f6059a496e8517ea88b71c7f4030 GIT binary patch literal 9572 zcmeHNYiu0V6+Sbwv+GCFLe@N*&}zx}6_Z)-c)V-7LKQ9;N^#Rr0SYM6WM6hn)?TyT zxS^HWSnG94NF<`kg8&i)*{MoQMY@DmDnGD<>p%SA2TgxeYN#bZZB>OPZAleyzcY{B z@q;8#eo)A@?%Z?FIrrYV=YDhVJ!gIMt?^uP`?qRZLyJ?yQ?+UDGZfM~V~)nQ8Y1!& zRqw0huiZpsXJa^<%osb;xuJB%xHCSa(K3$l!)$Ixe=fUo_);>XFFulArqH&WbBXTS zDl)sf)}_<-%z7nK)7s0k&Zh^5`vx*;HQAwdCe&m~jYQRsSUB5SQx+w3Rqs+el4>HU zrsCm|>>_EhQ*Cco+ci~dS7V)vM5#8lE21VeH5IEgZ1pY*-osn7oeoL(@>MYW> znxFRc-YEzImdZ{7ouf*PC@k0j+FV$CCCO>l9{2Fb|qD&n?N-*HJN)u(!znM(%uC&jO({tx$LCl&19?496 zs6RBC&W#LYheBQ9NLULsZ_nq4N50wG+CPxrzB3U{W(Qj{nbARYa3Gh<=31>bVF*Ks zbV~0?wI{UBWIEQ>rS~PahcqATAYaM!2kG6NEx;mqYXd)7g>Cp}D49ex8XR&yGnL{vek}R+z0n}&XIF*4f0o={@gnA)^cMc zrge)VszbTX;WO>>Zv?%@XDc!7^6vn>R>w}M^^d~#yi?eyOESKRa4wyRhX;lR@0NnZbPuM7ux}d4UAv6_Og0hE7^Cr=!M#&>Br!5V zVSX47UGW%N&uEXPWwJ?hH8Sv{^e1lI70>k>`A3J*GTa*;$>)}B;#y^+c4SMYO5vew zJ{`Vm)BWm5KAzk`VgAO9cq)Z&y`Rdq8rhjJ?l#>0*hnOeXxfPP8Ht3^iElqiu84Q& zXj+Z;sfh%(a~AQN<>LvIj}F>pDxQxoUyP_>>@tiV*geY%+)N8=_L5%F^Jn;$90*wY zCs3L5d|xiv11ZxE?5G{|wAI3XIG+>UhN?@mWE;MlPBdH%nm-|ytwMennIY+peFn5U z7RWOFB$&+i>N%e@tCjVJL3{gq&m&_!_nm&@^w-8rB-UHnACJfL{0g!=?pe@oA0W## z>w5W5q5;p3=gaTo&i5YBtQ$e*bRPLti?SaqEA<>cx;0m9k3EI#-5k|XCWir?=VRX)$wbbO9VzIUiJ{xX^4btn&u=D~W> zi<7ul9HUb0D-`Vc_kzCaEiw-_k~9?%mC!MgUYrw5fFet`35AAk+#RkX2@8|FIOSK? zpCl6tTxc016UNqyzZH_3ixL(>GWXp?^B+uqxJcGp6iR!aB6I)OY5wfs0_)FjDK6;p zgM!o`Q=y@zeE%pF9~6}5rl_p{m%$ISXaDq{py0b$P-jR|1);F9TaaLr!Ywm`bc-Ms zTDnE4WlSt=m=UE7f>eP0a5s{0HOIKSorxY>ErK$|2mcS&Ebw!NBYF?thFiC946_w zEGp|UmlL!0{BOY;@*0dEx7MIyci7jmd$(=(de|NI4(_d?pk?=Iw!3KAoo)NU;D?K3 zy+zV%o`No9!~0?PX}0@Ex_q}i?o_qM?)O-BFY7!3Ap8wh2g)gYBzesoO zgtbT7558;v#8c;edXH1jU>{$Dojry+(|M=PXU`dI_^x~v_(A6Y7)R;SXK`M*wD;C_ z2@lzJTPxSPrQWsY-n8_FG#t;xGZ}c{iRD)t!$=K`j`ZovFJjJAuc#P;7N%5AuVMc_ zTCP>jLq3XNl12BJUe5vUlC0y>+|4uSuik0L=42V$T194E_gvo%!Aj@)4;(!dR=U-D z=>UiO;53TtESFVhy=|aa?v87n*U?XUUVkb5arc^c)gK3b?z1o?(p7&PhCpI_T=mC4 z@BX+aaQd10T! zbAPJ>oXFygz>4i4iXG$1fn>?nk^GKJEd(61q8{gf&^&*exq9_sU9h zTvi$%ks+5Mm*FSNO8BG%e>}kcxb)&4L9bdZ6`qIBJ^p88pAan6!Dmt6gC8dN@V5Wr zXBxih@j3YOGh(UgdP(|aKq&eCPNhPfP#W(O^$C;MmoNE_z$dInd5Dya2Si<-C8hBg zVO;p#py9*okBfpXe?tHb`Dw%xz$?CUo3j2GeEGLU%eSm~i-JcT|9<^3s`!|KQ1&tZ zF?GGte3IZt6Z~sO=QY86>~0FK>V=;!NVe{bIDdZXqSP>kn8itqc?z;_aenKmi_j5E zs7sAwLJ99@9%=wRBa~LZLP?+OH(_@Z_d{KNnv%Uehzqm`9G95?Xz~K{=C|}Llxnxa zKOK{F`F_VwU0wE5P5k~LP1Tm`%5x_WD{%TaGYOy7_@^D4=oZXc9_PnGaudecw^1mgDvTC5HZk#r$Z?E9eSjpyC`y4NRN4cq*3Ka&F>7H__y1lj9G3-s&tcL7a~!t- z#%9Fe#0^wvELyP;^9JB@&%c1-_sKj0Y)&x!rU-k8h))3vh?&4P`oyeQ^3RcZ7%{1d z*DX9tO^BNyj>2&t&bl8M-Z%pcy=7rp3BQdsJ59hYZ2hu>DCuBW+AszTcb737to{rzTy0}G2>eX}9tCTG;ayLk zWB&XD7|Yk|V7SM@a3!u|kLAH|2pD$bKPzFlq4KRMp%)Q9@{GCtX$!;uulO~wpB9>m zz$@_8@+NS}*tO`7#jb@{;M9xZtC%JFGGX@17+d4p9miOnaISfi2)wwlHl|;5>Ky+` zAwKi6Q-21RK{uv0&9oDcs)F3*-^jGXZ};%;@bI@`k9GBjnRfKw@bLfW;eX)aSK)nJ z{W|<0Udia-$4|=?_%ftsgn#+Dcy>xtVaA(Gf4;uJIb)`UI zCCK9}*d>akal*|wYo7P$yZ8B(;)FNy4|-05$*sitF3pTx_)+Z-D7ihB)RL_fC+tOE eR}v?za{iA1U0KO}zol_P)*FTf%iVDq!~X^BDLI({ literal 0 HcmV?d00001 diff --git a/benchmarks/opencl/guassian/main.cc b/benchmarks/opencl/guassian/main.cc new file mode 100755 index 00000000..1b852908 --- /dev/null +++ b/benchmarks/opencl/guassian/main.cc @@ -0,0 +1,412 @@ +#ifndef __GAUSSIAN_ELIMINATION__ +#define __GAUSSIAN_ELIMINATION__ + +#include "gaussianElim.h" + +cl_context context = NULL; + +int main(int argc, char *argv[]) { + printf("enter demo main\n"); + float *a = NULL, *b = NULL, *finalVec = NULL; + float *m = NULL; + int size; + + FILE *fp; + + // args + char filename[100]; + int quiet = 0, timing = 0, platform = -1, device = -1; + + // parse command line + if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform, + &device)) { + printUsage(); + return 0; + } + + context = cl_init_context(platform, device, quiet); + + fp = fopen(filename, "r"); + fscanf(fp, "%d", &size); + + a = (float *)malloc(size * size * sizeof(float)); + + printf("OK\n"); + + InitMat(fp, size, a, size, size); + // printf("The input matrix a is:\n"); + // PrintMat(a, size, size, size); + b = (float *)malloc(size * sizeof(float)); + + InitAry(fp, b, size); + // printf("The input array b is:\n"); + // PrintAry(b, size); + + // create the solution matrix + m = (float *)malloc(size * size * sizeof(float)); + + // create a new vector to hold the final answer + finalVec = (float *)malloc(size * sizeof(float)); + + InitPerRun(size, m); + + // begin timing + + // run kernels + ForwardSub(context, a, b, m, size, timing); + + // end timing + if (!quiet) { + printf("The result of matrix m is: \n"); + + PrintMat(m, size, size, size); + printf("The result of matrix a is: \n"); + PrintMat(a, size, size, size); + printf("The result of array b is: \n"); + PrintAry(b, size); + + BackSub(a, b, finalVec, size); + printf("The final solution is: \n"); + PrintAry(finalVec, size); + } + + fclose(fp); + free(m); + free(a); + free(b); + free(finalVec); + // OpenClGaussianElimination(context,timing); + + return 0; +} + +/*------------------------------------------------------ + ** ForwardSub() -- Forward substitution of Gaussian + ** elimination. + **------------------------------------------------------ + */ +void ForwardSub(cl_context context, float *a, float *b, float *m, int size, + int timing) { + // 1. set up kernels + cl_kernel fan1_kernel, fan2_kernel; + cl_int status = 0; + cl_program gaussianElim_program; + cl_event writeEvent, kernelEvent, readEvent; + float writeTime = 0, readTime = 0, kernelTime = 0; + float writeMB = 0, readMB = 0; + + gaussianElim_program = + cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); + + fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status); + status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true); + if (status) + exit(1); + + fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status); + status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true); + if (status) + exit(1); + + // 2. set up memory on device and send ipts data to device + + cl_mem a_dev, b_dev, m_dev; + + cl_int error = 0; + + a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(float) * size * size, NULL, &error); + + b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL, + &error); + + m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(float) * size * size, NULL, &error); + + cl_command_queue command_queue = cl_getCommandQueue(); + + error = clEnqueueWriteBuffer(command_queue, a_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, a, 0, NULL, + &writeEvent); + + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + + error = clEnqueueWriteBuffer(command_queue, b_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size, b, 0, NULL, &writeEvent); + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + + error = clEnqueueWriteBuffer(command_queue, m_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, m, 0, NULL, + &writeEvent); + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6); + + // 3. Determine block sizes + size_t globalWorksizeFan1[1]; + size_t globalWorksizeFan2[2]; + + globalWorksizeFan1[0] = size; + globalWorksizeFan2[0] = size; + globalWorksizeFan2[1] = size; + + int t; + // 4. Setup and Run kernels + for (t = 0; t < (size - 1); t++) { + // kernel args + cl_int argchk; + argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev); + argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev); + argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev); + argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size); + argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t); + + cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true); + + // launch kernel + error = + clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0, + globalWorksizeFan1, NULL, 0, NULL, &kernelEvent); + + cl_errChk(error, "ERROR in Executing Fan1 Kernel", true); + if (timing) { + // printf("here1a\n"); + kernelTime += eventTime(kernelEvent, command_queue); + // printf("here1b\n"); + } + clReleaseEvent(kernelEvent); + // Fan1<<>>(m_cuda,a_cuda,Size,t); + // cudaThreadSynchronize(); + + // kernel args + argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev); + argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev); + argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev); + argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size); + argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t); + + cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true); + + // launch kernel + error = + clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0, + globalWorksizeFan2, NULL, 0, NULL, &kernelEvent); + + cl_errChk(error, "ERROR in Executing Fan1 Kernel", true); + if (timing) { + // printf("here2a\n"); + kernelTime += eventTime(kernelEvent, command_queue); + // printf("here2b\n"); + } + clReleaseEvent(kernelEvent); + // Fan2<<>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t); + // cudaThreadSynchronize(); + } + // 5. transfer data off of device + error = + clEnqueueReadBuffer(command_queue, a_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, a, 0, NULL, &readEvent); + + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + + error = clEnqueueReadBuffer(command_queue, b_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size, b, 0, NULL, &readEvent); + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + + error = + clEnqueueReadBuffer(command_queue, m_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, m, 0, NULL, &readEvent); + + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6); + + if (timing) { + printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) " + "[size]\t\tTotal(s)\n"); + printf("%dx%d \t", size, size); + + printf("%f [%.2fMB]\t", writeTime, writeMB); + + printf("%f\t", kernelTime); + + printf("%f [%.2fMB]\t", readTime, readMB); + + printf("%f\n\n", writeTime + kernelTime + readTime); + } +} + +float eventTime(cl_event event, cl_command_queue command_queue) { + cl_int error = 0; + cl_ulong eventStart, eventEnd; + clFinish(command_queue); + error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &eventStart, NULL); + cl_errChk(error, "ERROR in Event Profiling.", true); + error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &eventEnd, NULL); + cl_errChk(error, "ERROR in Event Profiling.", true); + + return (float)((eventEnd - eventStart) / 1e9); +} + +int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t, + int *p, int *d) { + int i; + // if (argc < 2) return 1; // error + strncpy(filename, "matrix4.txt", 100); + char flag; + + for (i = 1; i < argc; i++) { + if (argv[i][0] == '-') { // flag + flag = argv[i][1]; + switch (flag) { + case 'h': // help + return 1; + break; + case 'q': // quiet + *q = 1; + break; + case 't': // timing + *t = 1; + break; + case 'p': // platform + i++; + *p = atoi(argv[i]); + break; + case 'd': // device + i++; + *d = atoi(argv[i]); + break; + } + } + } + if ((*d >= 0 && *p < 0) || + (*p >= 0 && + *d < 0)) // both p and d must be specified if either are specified + return 1; + return 0; +} + +void printUsage() { + printf("Gaussian Elimination Usage\n"); + printf("\n"); + printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n"); + printf("\n"); + printf("example:\n"); + printf("$ ./gaussianElimination matrix4.txt\n"); + printf("\n"); + printf("filename the filename that holds the matrix data\n"); + printf("\n"); + printf("-h Display the help file\n"); + printf("-q Quiet mode. Suppress all text output.\n"); + printf("-t Print timing information.\n"); + printf("\n"); + printf("-p [int] Choose the platform (must choose both platform and " + "device)\n"); + printf("-d [int] Choose the device (must choose both platform and " + "device)\n"); + printf("\n"); + printf("\n"); + printf("Notes: 1. The filename is required as the first parameter.\n"); + printf(" 2. If you declare either the device or the platform,\n"); + printf(" you must declare both.\n\n"); +} + +/*------------------------------------------------------ + ** InitPerRun() -- Initialize the contents of the + ** multipier matrix **m + **------------------------------------------------------ + */ +void InitPerRun(int size, float *m) { + int i; + for (i = 0; i < size * size; i++) + *(m + i) = 0.0; +} +void BackSub(float *a, float *b, float *finalVec, int size) { + // solve "bottom up" + int i, j; + for (i = 0; i < size; i++) { + finalVec[size - i - 1] = b[size - i - 1]; + for (j = 0; j < i; j++) { + finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) * + finalVec[size - j - 1]; + } + finalVec[size - i - 1] = + finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1)); + } +} +void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) { + int i, j; + + for (i = 0; i < nrow; i++) { + for (j = 0; j < ncol; j++) { + fscanf(fp, "%f", ary + size * i + j); + } + } +} +/*------------------------------------------------------ + ** InitAry() -- Initialize the array (vector) by reading + ** data from the data file + **------------------------------------------------------ + */ +void InitAry(FILE *fp, float *ary, int ary_size) { + int i; + + for (i = 0; i < ary_size; i++) { + fscanf(fp, "%f", &ary[i]); + } +} +/*------------------------------------------------------ + ** PrintMat() -- Print the contents of the matrix + **------------------------------------------------------ + */ +void PrintMat(float *ary, int size, int nrow, int ncol) { + int i, j; + + for (i = 0; i < nrow; i++) { + for (j = 0; j < ncol; j++) { + printf("%8.2f ", *(ary + size * i + j)); + } + printf("\n"); + } + printf("\n"); +} + +/*------------------------------------------------------ + ** PrintAry() -- Print the contents of the array (vector) + **------------------------------------------------------ + */ +void PrintAry(float *ary, int ary_size) { + int i; + for (i = 0; i < ary_size; i++) { + printf("%.2f ", ary[i]); + } + printf("\n\n"); +} +#endif diff --git a/benchmarks/opencl/guassian/matrix4.txt b/benchmarks/opencl/guassian/matrix4.txt new file mode 100755 index 00000000..abf30b49 --- /dev/null +++ b/benchmarks/opencl/guassian/matrix4.txt @@ -0,0 +1,11 @@ +4 + +-0.6 -0.5 0.7 0.3 +-0.3 -0.9 0.3 0.7 +-0.4 -0.5 -0.3 -0.8 +0.0 -0.1 0.2 0.9 + +-0.85 -0.68 0.24 -0.53 + +0.7 0.0 -0.4 -0.5 + diff --git a/benchmarks/opencl/guassian/run b/benchmarks/opencl/guassian/run new file mode 100755 index 00000000..31683b1b --- /dev/null +++ b/benchmarks/opencl/guassian/run @@ -0,0 +1 @@ +./gaussian ../../data/gaussian/matrix4.txt \ No newline at end of file diff --git a/benchmarks/opencl/guassian/utils.cpp b/benchmarks/opencl/guassian/utils.cpp new file mode 100755 index 00000000..b0f9115f --- /dev/null +++ b/benchmarks/opencl/guassian/utils.cpp @@ -0,0 +1,204 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#include +#include +#include +#include + +#include "utils.h" + +static bool usingImages = true; + +//! A wrapper for malloc that checks the return value +void* alloc(size_t size) { + + void* ptr = NULL; + ptr = malloc(size); + if(ptr == NULL) { + perror("malloc"); + exit(-1); + } + + return ptr; +} + +// This function checks to make sure a file exists before we open it +void checkFile(char* filename) +{ + + struct stat fileStatus; + if(stat(filename, &fileStatus) != 0) { + printf("Error opening file: %s\n", filename); + exit(-1); + } + else { + if(!(S_IFREG & fileStatus.st_mode)) { + printf("File %s is not a regular file\n", filename); + exit(-1); + } + } +} + + +// This function checks to make sure a directory exists +void checkDir(char* dirpath) +{ + + struct stat fileStatus; + if(stat(dirpath, &fileStatus) != 0) { + printf("Directory does not exist: %s\n", dirpath); + exit(-1); + } + else { + if(!(S_IFDIR & fileStatus.st_mode)) { + printf("Directory was not provided: %s\n", dirpath); + exit(-1); + } + } +} + +// Parse the command line arguments +void parseArguments(int argc, char** argv, char** input, char** events, + char** ipts, char* devicePref, bool* verifyResults) +{ + + for(int i = 2; i < argc; i++) { + if(strcmp(argv[i], "-d") == 0) { // Event dump found + if(i == argc-1) { + printf("Usage: -e Needs directory path\n"); + exit(-1); + } + devicePref[0] = argv[i+1][0]; + i++; + continue; + } + if(strcmp(argv[i], "-e") == 0) { // Event dump found + if(i == argc-1) { + printf("Usage: -e Needs directory path\n"); + exit(-1); + } + *events = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-i") == 0) { // Input found + if(i == argc-1) { + printf("Usage: -i Needs directory path\n"); + exit(-1); + } + *input = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-l") == 0) { // Ipts dump found + if(i == argc-1) { + printf("Usage: -l Needs directory path\n"); + exit(-1); + } + *ipts = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images + setUsingImages(false); + continue; + } + if(strcmp(argv[i], "-v") == 0) { // Verify results + *verifyResults = true; + continue; + } + } +} + + +// This function that takes a positive integer 'value' and returns +// the nearest multiple of 'multiple' (used for padding columns) +unsigned int roundUp(unsigned int value, unsigned int multiple) { + + unsigned int remainder = value % multiple; + + // Make the value a multiple of multiple + if(remainder != 0) { + value += (multiple-remainder); + } + + return value; +} + + +// Concatenate two strings and return a pointer to the new string +char* smartStrcat(char* str1, char* str2) +{ + char* newStr = NULL; + + newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char)); + + strcpy(newStr, str1); + strcat(newStr, str2); + + return newStr; +} + + +// Set the value of using images to true if they are being +// used, or false if they are not +void setUsingImages(bool val) +{ + usingImages = val; +} + + +// Return whether or not images are being used +bool isUsingImages() +{ + return usingImages; +} diff --git a/benchmarks/opencl/guassian/utils.h b/benchmarks/opencl/guassian/utils.h new file mode 100755 index 00000000..1e901ced --- /dev/null +++ b/benchmarks/opencl/guassian/utils.h @@ -0,0 +1,84 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#ifndef _UTILS_ +#define _UTILS_ + +// Wrapper for malloc +void* alloc(size_t size); + +// Checks for existence of directory +void checkDir(char* dirpath); + +// Check for existence of file +void checkFile(char* filename); + +// Parse the input command line options to the program +void parseArguments(int argc, char** argv, char** input, char** events, + char** ipts, char* devicePref, bool* verifyResults); + + +// Print the program usage information +void printUsage(); + +// Rounds up size to the nearest multiple of multiple +unsigned int roundUp(unsigned int value, unsigned int multiple); + +// Concatenate two strings, creating a new one +char* smartStrcat(char* str1, char* str2); + +// Set the value of usingImages +void setUsingImages(bool val); + +// Return whether or not images are being used +bool isUsingImages(); + +#endif diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index b60de117..f3d35062 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -31,7 +31,8 @@ CXXFLAGS += -I$(POCL_INC_PATH) VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a -PROJECT=kmeans +PROJECT = kmeans +SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c all: $(PROJECT).dump $(PROJECT).hex @@ -50,11 +51,11 @@ read_input.o: read_input.c rmse.o: rmse.c $(CC) $(CXXFLAGS) -c rmse.c -$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf -$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o - $(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu $(PROJECT).hex: $(PROJECT).elf $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex @@ -75,4 +76,4 @@ gdb-c: $(PROJECT).qemu $(GDB) $(PROJECT).qemu clean: - rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu \ No newline at end of file + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/opencl/kmeans/cluster.c b/benchmarks/opencl/kmeans/cluster.c new file mode 100755 index 00000000..bc3c7c59 --- /dev/null +++ b/benchmarks/opencl/kmeans/cluster.c @@ -0,0 +1,155 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "kmeans.h" + +float min_rmse_ref = FLT_MAX; +extern double wtime(void); + /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocate(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/benchmarks/opencl/kmeans/getopt.c b/benchmarks/opencl/kmeans/getopt.c new file mode 100755 index 00000000..fa2f3137 --- /dev/null +++ b/benchmarks/opencl/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/benchmarks/opencl/kmeans/getopt.h b/benchmarks/opencl/kmeans/getopt.h new file mode 100755 index 00000000..bae04bf7 --- /dev/null +++ b/benchmarks/opencl/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/benchmarks/opencl/kmeans/kernel.cl b/benchmarks/opencl/kmeans/kernel.cl new file mode 100755 index 00000000..11ca065e --- /dev/null +++ b/benchmarks/opencl/kmeans/kernel.cl @@ -0,0 +1,61 @@ +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +__kernel void +kmeans_kernel_c(__global float *feature, + __global float *clusters, + __global int *membership, + int npoints, + int nclusters, + int nfeatures, + int offset, + int size + ) +{ + unsigned int point_id = get_global_id(0); + int index = 0; + //const unsigned int point_id = get_global_id(0); + if (point_id < npoints) + { + float min_dist=FLT_MAX; + for (int i=0; i < nclusters; i++) { + + float dist = 0; + float ans = 0; + for (int l=0; lAYal0cw=BqWf81Ol>?Kmr{M7+Eyn5<=2}Xf~4$;6{)T zwT&n!N z1Q-YY=L!l-Su@L%hG?_Y4L%K}9seiUBw~^+S5(Y$h(s%9X==!l{ROXFqbkd{Cri*Q z6~!$OY)4(+pDsX@%SFX3-=;#iHd85axy7C8vE50d-ASXJSt^G9W@(O(V)%ctL0@dk z1w=TPg_1As!P4xu_zPbJ>-#52hW!(AMa3*PO0;5@X1jCIj(YyPm&;&WX!O^YT~ZDG z&C;BQvDH;8CgzN-uE?mas%=`Ev3AnLjEOm!jdhvh#JDM+ymJ>U5j~l9i~~fSWAUFR zCw<1>SMDA6;QdEWo#{My$Q_zKeC&*wq>aaabKVpOB&v3y4e>enKlxbOkko>AivHu? zAEE*Fdfk8)y%B!|cW-!KAN;$(@9w3CWGL7hes~{pI>6sszfbgmU)6^ky$}2&ec+$z zLw-#k{GAwUZ+bq^2mj_i@PqrnKhp>P?mqgB?n8h2S#NqS??e8gKJW}1y~()}{klQx zc}?y^pS@u0P0lNQ^!v*`)g7|z+y|;FD5o2<;QeHi)sj9l7xS?)kLs<>LxnAGwx>}!i zt*@}QvJQwjRkc-(t5BKU;4Slc4bJnce5)4KH8qrb1C&`!m6hHGfD66V-m*rM6NrM^ z%bL7R-Uay!%W7A8&-OOddaDD?X4lo!l+{+8OAa8>e{pr0ud=S8rZB%zDJ@-DQ&(Hs z=qqdRm6isD_h@DGu+Upp(Nhg$G?xCv=;k!k)hxceo?ROH&iB@wv*HqOxlbvbx4Lwp zcV$(h&)YD&x~#F$+Zbpb(0-BEC+m3)D?v3hIKQFF=QUJ-sT+M2JZZb(IxmmqP}1H~gTctg4nwUS)1k z;jG!E<1)u(PLTDIlG5>+Im$W31q)^um1bvV1z56k0@YxRytr)0Ib_CULl)@(Y{!3= z8ve_F8tM{a4kGKaGKn)0^*-L9N5+Ed=vEw>^43ssJR&OT$Ss`@c=J#E0gc=m5s zLNO@Pr9QTRJ||}`2Ug~~W==`ij4RB!jg*w=Kzt&e`Ivdd3S4>1MU}qe7BzX(E%@OUyvu?gVZp~)@FOjFw*{YM!KYgA$rgN;1)pNU zPqN@gS@5|Qe5wWSvEb7z_+krwv;|*c!H==vms{}Z7W^s;KEr~qx8O4^__Y@NSPOo= z1)pWXUvI%@TkxAL_;D7zr{!zi)8sCCi?LJ4x*&upa_UkW7rmhLmAay0j;_e)l ziyGloBz7L|!j&9On4zol4FTH-+X%M{_{U7Z8sS|6K1P@!s&j{cj}T^9?A$EiPYE+r zb*>lihlJ^7o%I5Ki!eh|=W+qRLYN_`vsl0{5N0Uq%oXr%!VE#3Spt5NFhftLTfh$! zX2|Ju3HV;ZQG^u%-$~d>_~;oRl5Zi*(9(HWz&8+PNa=h-z#9lNlytTW_zJ=dA)UJf z+(4M2qjQIVFCom3(YaZ`D+n`GbgmchGQtcIo%I4 zhJbB^M-Xlo@Q)dQxwJcX3HTUcF6qu40zN{ROSyBifIlV7CEU4Qz#kIk((SAl@LPnr zWILA&_!Yujs-49Get|HTXlJf~cN6B)?93AIlZ4X=y9N9(VJ^i^mw@jjoJm*_@STLm z5}ReQljFIMu( zbPn4nIH}&=<+GM69iKwJNfu%$+G6_*J?wGS=A1xfPZ{Cu;#fAP?mn`zX15=my zuS$ya`;v6L)^I6%=n)+Woa5GZkN-vI#_foEd-lMld-vhiaW}dIsnGwD$N!;o<1SG1 z+iaee7wmbPc6FUOb0+VHH}Z!cxaQ0CxO|`cj~4km_jR~_IIw=bqB!%9dN$3nd7Sxg zZi)OKtKcxwGFAF4|FuUR&Fpt#_h3Pg{PH+q3Da zq^yGM_UuF1Z|wmR6Q7@yi&9jwHX_)0{P{_f;DC8!+(?QsD*KPds2sC^a&F84wrEp+ zQXGYN{0H}3gH|0kae!@CCB=F6eW$gy`=@_D8qBRvV>@W=a^CbfN>6#0XW!qnj^qC% z>te9BP5%yD(8uH7C&YOCj+cPx$TbvS%VuqkCx8R<3n)aVZCM@PN`s=oeZV?57KkBE zznh$!7ALu>VQlsxF)<$hg-LDHJ>PJtfVwOo!0}$1TcMCO0jARi(}WwUpI1{Xu}9^0$mpvz2to2aTp^ zvDj#FA6h^Xa)IfWnY|hvIv;-rLPVdn!Gi791h(Be-LqAA)AR+vKbvY|<=`)or12nmHWejB?xsKQfgGjd97&2ZNV+7! z+mwzRlFIWjE%1~{Twr36n3Xtg1smb9`AJ#6ep&4vhu9kka-bOGi=)tA+w@4t=D4yC z8DlGg4wE=T#T~im!!o#yw?U@=g*_#f&E@oouCA{JBd!5|G9$R@d;z?OkIx{wb-_9% z7hIHgVctbA?up9`a_wd7j|JbJmo2RyB>p9V-z@^yeGE|?8}_psNZe`0^Sx`)u45u` zTN!V6Gm!21qlJGNXq`>aLiUqu>3=3k%euwi-JNXjF_q~ULYA|@ve(Uy{`ft>R3U65 z)Db=!xo52@Q?85ljxWikf{iY`XO~INZinx793t^^Y`t75dpd%|j>k!P#h@VAxM67_ z(j5CuWOxV+dv{QqnFR0TCSHy>kFaIycoirtqmPr_N@pu6Vz#F{Qrt?%QPSF`{~ffB z&t;u_9q-HfA=X9vw&{1F&aAt08phv!fAPPOw-{N+?S=kRMc61>zV>Wv#}*Um$58!6 zNp?@$g-P{AZPiI@MG!MRGD}q8PHv)we|C3Fx8a!c*mc|VMig9xk=cjDniESfmaaO# zEiAYIABBGA;`=kY;P{fn;tMP@bjS2qK`CNu$4O(n;Ct3R%UGgvF@7vbzac3uKoX1b z1xdOUB(78IVti1N)*2+cT&H&Q*>R&JU1X4q)$JhZGD(_ckYZ&kK52BECrP6Wl1q~K z(9toKB)NojlG~;iqUe1I=XP8zxMghm2+hHs0QQcie@H1sZT+4cNhuTlfT9Qi#RZlh zP-j30ggX)I%0wF}>mkU>r*jvyzP0YcqBci0nzX?Svb*5qSciq>vCp*FZ&mZ!BA=OB z^t|E=ZE08isI;C~HxhC7RAF1uJ4*Ij$DJvRvL*gpeXOfXgqW}1>k*mC1C;0YhHpCS4Cs?20$Nj57P9uVQJhmKW8j z+XMx97w|=JeOJE!NWTBCFcEr&h$4SiM>@6>Pumh?w{Q6dd)lV3d%fc}=;CR+TItv% zDnPYuOccwGBY=``Q7Y%m<9^LNDeA#&|pKFA_IIdQ#cyn?pJZ<)5PLs#KG06jlYz)0W zc3d!|15L&(mSRga5M0E{L3@$`!upD`yJUPWg!~0FngPvPPIfsrehqv}$IOR=MDl+CE%1Ek@gMVOE>BCl?(w@klV5K7HsAk(2azq#`S^Qyj5ZR_CFlsb@w#u| zqMI??j^CmN=~;X324rs6pGahScKe>EO?1aAL>47wxP7{~ z>8BTA1bF(GTI4@tcKaHbXxRs77fj_OP7M}>|1f7E-`|Pnwqg&Gw|sFgE{5V1Q_QT+ zBLYc!M;KNr%*nOr2HoCz!Y7vN#(GX*d%pkJo(}@r4uQ5@%Nx5ugnp@e-X9kn;F+(v zz@aeRyg!wKQNmE-VF`hbi>$&kK2oeYXKON_ldjp%wP}q3GB0aImmJz)bSNKBn?`oV zpoOtc@JyJy=Q=QTq_H0++r{umJ}c&8)O+|C*fB#kZhDh?>;v%CFe7dVKH0ugGC5nn zhw-|%7vpJdZ+ep+*3Gkg9BFq?ECY&sy?SCkiI`3}V#|-}x+%g3MJ#MNVl!RP<3H(9 z|M{QzABS=R(ztRt-CxCV{aMBnZwz{I_pywl#u&>x6fglioeS~g--1}DW_Rsrqa{Bv zSdMf?ie;UT7>328c0Pc>E$xm0renL)Tf5es?Sb3IdCFfZ+BEGNGd>xjT3%8+wm=#m zNDmga9JF`LMvn+5Z}~gMKyd_>H?Yh1KmXKJC~(ywKSXvGq0_y$!<{--AAy&(e(t=f z6$32D{wBCV(;tuQdENBK>~9eKJFWvebV;ALX~;J;N$a6?)699sLYTx|ZO@;~HhD-T z5c=beJD1s0d5#8|?MwEKSrC#x8N+M-!(Kml3ps{wI_d6TH(%dv6<{-p1$SP{8PzwT zEj9_y&>I~vCi}r8>w8X`YeI-eM|dPeBGd5~i0k|aFFe}pNeq#1pemG`WGZKA(6Ni$ zkAbCQ;#aT?HRa$-ARHRp0xHISB?<)XSL5BxEp}fDLFR_(0!g&$I1e>Bf7~zg{O>Wp z!A|*op8s1WHIv_Tp7V+aFNa_hq&4Y9<()+c3GM>t&E15T7nq$y~qDPojKs%>IXj$r-kLOd-na+=JCJo zK`y0t42GQSw+dP@D1gqJ`XkQxW04oUfc)>h=6?vc7z$(&;!K=p(=-I;1BLz%@CKyy z8|O_QG6s}~6t;yG`d=t)i+z#lC;S+(&b|SIvtNT)u1dQ|TE4Rv zw(QgWb57;^2Ng^{xYkiH`S4YbLyH6XNn_M?gZSpho!x%Wz5zape7haW9Be)+-bV2y z);rKztY+Fr?>MictIMGMfVXEIPYL6kQVWrOt;5!{IEm&d@^@IxQ=}qX_uOqZ6>y=u zXyu|!Ijsn4O5Tz#A9KMUnG<4vxGE_X<40nMSGqOG_qfWCyS-7eZ2eV9V-#O4w>drg zL_zjPm780?SvO~XThRn1uPyqVP4?^}|I5NYNZW^@;UU-wIl#2>NDzzC0F8d-xbxzD zG@SDwmB3nQw--%rZ+a~cnPA>QOayTU^g_H9ESUV+Rof9UT&gn@8`kQmYmn=Gj~Q{c z&V|{ahHKBGH~?wxhp&Ix)m6we^~=A>cY%LF$pPsdZ?lHv5H4id8}i!*cIG1%(FyLx z%s6klLO@+k|2J5Foz;Oj#d%XY3kV0^g)&aIz6Hm}KtEsP|C943yiLG<^4;f~W?`o~ zjj>O-3W-z_1%K(w}LWTPZaq7w4g2WO=s#K z)+I87blI;%WV_ICv;)VTOMxtTx%mrc=9{ZSoT+c-`#*%S)Pl)xH1G42ztDQ3s4a4z zGxd+_hGw^K{uk{LrF2O8YZ12J)+*Q&nnU#H7coPJk2}xZ@62q!!a-}j=uF+;@hL1Q zfS|I-@38*;U3upf`cLF7DfGXSx439i@{ay)yz<_Zu@mK@qO>ovKl1E5tuYhtxcYNU zL+dxbp=Nw9@*gSU!%J6QSL~lXZ~@QcubPfvrtoT?FLY!YAMEn@i*ygXVFCxLc%-e`~R$6Dd(iO z#y#{3^jBWp#8X_oH4y{us~t`$t?)K9uBz&_H;es$e(vjEjxx|+Z)f6C)27czt_XNB z6+C~go0f9dRk{uL6c(6{DNA?%oG3lnO|n}|Y0v&F=hL8P-`3gm&F^;naA4Pr@Fk69 zE4@?P$&Ky{8rM|$%2#8>dA zm2v5<09MWk7AiS2YTPwtYfEblY@>S$z^a<28h33I=YT^G(74pscq`cMf(mbCSyQ!d z`h<(5oLYBHRc$vp==r}P$LIE~YVbC$s;d_LxTgfc)n(O9UUuHg70@RuGb=0G(5c#8 zTU}RI-)Qnr!Rf$;GGA3)ts5sC-QKeDRlN-}Aj_1q!W3YXQ>tolP)|&PyQKdkWpD7W#XSi$Y>#AyfjRMNZKv=J>t@F9-8|uov zjOp?UpWgFw&qn7BbyXGa*_Q5ma!ohd&+<0-Sx>9S?@P=Yh% zTjhoSLVz$I{4zOnd?oE4jGqhZp!0leJnlu8HI+5MRSV>Z$%;W#P4P{snNmBYddiB1 zIwfmjR+eHg)YiEho6w0^INTVpjVbQ1aK)XDTX^Bvb70?^`f9JwTcLyp^{f+TRvWQI zVWc412($)1xnha}ZO2twI0{Wyluf09b_~xImH#cf+aYU2clh76kGqd{ih)0D*t^{* zclBZaRXyb78gN~7ixE@cY6$u~G1~|~>*{K5RtIZS*DJ_B@jv~euC7;6PWww&*Nw=e zzebskq&xbPuC8ssM;^wZ2b4u9BSF6qDYXyfqbPri@+v%e#Y`OvjVtqbM)L zgF>u^5+4ZoSmIUmqbOfR`45zMPbx0;iiL{0)};=P4AnQQp)TTiIHwkRjFv3o-aEgd zBpLiAA9i)QAXkZU&54Se=ZsjRuUBR!OdUNwX#@qZJs#5(7si!CM1GX}(kNZ2K=nfW zp9H*%pM}hda&5BBii*2Y1IM-Yyr|R`$DFA3s+OrmU9Xl!ZB}PR-LB4!+MzB6*7gXH z#Rf7T$P08U0y@lsUIoyr z7^uHTtt~cC`9PIK)t5xAz1YCa1!g{&8h)-b)uG+4Lch;IzZ4>%_eIdV2-U5?XW=Pe z9ZKqRGwpVhRuJXhWSNUyZ;Yo9<9S?-TAL3mmPf74g8^TJdkZhXGsN@iS@Jt6 zRy@DMVujBbaHK>lZ@6+rt)BsJ#qk$jJiyEA{prg^X`a~^n*cC8(95gCpfg9{B|f4+ z`Td!$I>NWxa#`S;G4gspG*?u(wexz}mdiphpBwmz4=*E3D{&H3cp%w&8En`fmJ75Q zL)Y6S7xyn-+@g35l7&(nnJa20UN>MXjqVhD?keD~ z!zJBl{{MM#(y)KejzaoNJq8+OicwB9$~i`Po>5jB14j9jQ64bLca8E(qdZ}hAu&e( zMww!i6OD3?QJ!a%l}5SRD6cij+l=x7qkPIJ4;baUM){>to-oRu^T9lS?(Er9+^I`e zG}Zc=+&P((GskC4XcAENy6l|HteiB7ReCT^${d%GJ=x$S4-g{ZlTI&P_YEUJcumhW z0>{0ECT1WJNn=m#0H@=>O&OYDF5z6fJm)2nNm1*5v1fRV|?s;f%@+doB!`3C=& zo1w#KXv>ZU^=&4InjZQw0*$8XQxP9ETWCKRSF?wHDv%-51=7J6cs4$JIZ~@A*zUmV z5wvw&hoRawMuE^$4}xu=3)PLRLhxmXz_#P)+tDV7PeYhv6RQqAM1PV@YDhkiwgY0&|jnae(3jnz|(I*kD({nqoPMRQUB>tR`pk*W9TW?UHbV5yP>C9kJINPL+|Vx8Q^S@}j-nFC3L6I%LsbDx3Ofu`s7(O5VNX%4T>zdiJLV|VA%Nnr%V3Sr z5CN2g&18?dIuf|$VQZ*XxS9rFRoE-AerTjhh3mr(lHwA8FYKmB0MY6iP}YV$NofPs zSpe3DeN7Dpsl_NZggr|jP8|z|maq!88KUxcCaw>=g+Q{p0SueNhEcan^WTIMte)cQ2YcYn9Tuw!} zt}pr(CjM4TMTA{H?+TQWThUT4DFit(8J?wINUEZ*Lt}k02d?N&vRrfl@OGsiXZhEt zMr}d`-%h;?0`-f}0VecQRO7;wuTa-t12s&cSVfP)P-We&zl2!~Q#epv=XX0~-K98R z1S<5-&{^m!E_4UyLcfmlau2JDehz2#q;qjiOD#8MT3 zU1DNmfV}{Ki{o%n2AqBao&G8KRQP^kLO+GcToFUJvA0X0WQ2Pw(2BkZ9Y+kmn^i7{ z_dyTQAHu!AX1CvuQq%3HAuIY>G_see9KP*t8t4+5ECyde#(*2c@TeM@yC3|qk!{b2 zVv|JPpW5Xg75ueBX8qJg0Ydmyj~Qy|3(yd z{7Dpd{zDYM8iv`8jodax6x)kMvEwpP+_Om(_wE$MeXoh)!EZ(JFlR3|^3n04_{{=Q z?5YyQ6Mj)V$t4~e`5(`V;;-y?s95_!D2Um;Y zha;kRRmH-Hjr`*iZ`xAF(6V`6A?)VM24v|MH#M^q8x|`P-3z~l(OE)1g=Mui_v748uu0CyrRV20qjOwTq4Z9e!X28^eP(Johvs`oe$8V8aD(Swzzs2 z#vYdrv>w+C9#`C*LY*b-gbjZu8s7Xk8^$8yC~;4q2Q}_`)NOG#F`Ak1ZNdBlr8$51 zE}6fc~j#;#L(veLALlCq54GpPZ$H?s&jx%odB7GKY}o4qRoahc<^$NokMN6 zz{W$!<&3wf1Hm)|{}U6zp$r{Obb?JyGf|0EKr70ywNTC3wHroL-TY}Kr%gQvJgVwq zH^VEu<%-iGQ8xFr>{OSKefTQxTE!V5A>BO$^W$_$$mRCHFwR&Bxrf)4`xNIO3FW%q zL}$*y5?Vi^o}CVrkfM%AfF+zG4A3^>L-M8=pnXIrX4RQyfcl7+II465bd4yX9mX1< zdqfJg9dCfS?lN{XQG&bN-(qym$r743VjEefNyx5_e1WX_ec$%xSU$4Fd6#u7)TNUUg z*B^k@?2cNZetqr4BDh5jUfwC?H(qO@0dQqn~Xf7k2{D z)==^T%yIvvA_?H`AW4)mN=crL8P%^8&A4TBqy2@0J0f)AJ|fjg4Z+OmM+G+dTA=lt z)iiXvlN{VmQwXIHgdw<@CGUrRdX3=X)*}tkgEjdpGF1sBJITZw1bm0WC8T@?Gxh&a zNMV4KQbf(#A;l&Sr>v7gMQ&?WW8R3sqn{BZZgqmBBws|?iK27vd{VX$YnVyHgzBxr zOxzRY92m5EIG#R0qYrSSOkqXZD*1g{zFNqAnIz#OY?eY0!}|>#Wfte)2_c+&YfrA^ z3#r{yA%&Z65ACF=WE(YnUI^iq9MsUDKLDHbuM|SKS4+CkBKau$e^xZ*{%s}kM05XV zgweUDr##BJ6}~AZIBhR_?SF!v&Anb~#_=g>zoS)tU4<&Q|DZZb+I(>9?W8axuu`6& zwA(3-NrF|{SDdy}X=uZ|!O|xAZ&dz#jifA+#3~~snTUg(C!%oi!DqCuMwt}d%Ctj2 z0HM*>Re?d#ze9)7*RyWd&tNG<`&rlZ$1sfO8(5FjAHl>%x3TWh`QAMGM%H8Xy$DLt zn^=$2-^V@rCf0}OKT`h9th;q4Bhj}o1xVGOqs&|BsaZ-g&a5f=FkxXPK|*UKc?Q_~ zhl<$2oG54tvAUGBhu|mr141;(!llG54LvAnacsVqreF@$vvu-XMy6Au&kE8XMVk~+ zV{>!uQeN^*#sEdIG64(rq$D4P$@CZ0Coqm30TOGlH@i-r#I}djhrz&fO|}&b$tT!0 zL5zI^xBoRH zGx?Ncv67@pA9AHF5}h+E6`dnK=hMA6h_%Tq)j*F4ff)U;xI-x>u2NeHRMP)O>xHPI z0GSDGa4^c44}sH9HYs*M`pxJqc0dMu3-!RM)q!Wl^~V-A95Xa<%5u0)=4#CK zz^S6T04rzU%zoILGmFsqz+7U-UIT0u=QVU4EC&h8f)e7$DcN;b{|yC`a*po+b(2%k={<8#jG#ZC&L8?oyYnT{VO=rpo>^PPoIa?Ip|{6 z&(|@r%AhjVOZ0J+Q_lKQo!{3Q2;i5hej zmF82Tu)WBjlyO&*t@$fpLv%h*jz2|)hrtQN-%+_!X*|g6o=n*!hcT1#;p6sGShF%4-o^aNipy@o_pD>wyD*8y=5~i?jr}`k< z#-Yo>;|qbZrNe-O&&pwS6a)-jLs_;un0UzMYOvWVF{mM1-eWURlp(i%%4!xE5=SLr zszdbAxaW)o!ge3LCNXu2z~&PBYanbp;a7=i#R7XCu@3@aI}ekRIJ#0`tBG9#gza5Q z7;}}tZXos@O5hJOB&OdYu(uOC9thh=D3qA7Q(%8X>7 zKXNnibB?`|dB!N^tY2WI*%HJ?t~G%==`HG-AYK|yIt&*zh?7f8Yt8zkk@Td&D*0hh zzO z1M?25;;o!bnf?sJ|4ASw4PcA@VlSMuI#^d`d<1>es8|ezS%>*CK@P>?QnwWjLdL^t zt`B*`8t~Y*EYJJPfL!8~%v2n4P?y-&;J#SR0%pd0m|XQrexI8oat+MI9VNQqrEa_0 ztthiY*z1*K;5GXfVJ28cuojIa^#y_}sqd9+HynhS>?QwB4tt#x;8F?_v3AvFF4*;8 zDfoc$S_txojptO>E9ypC{6;_p!n`8?jKXMg`AdcBO|-DKWoz@?Bkb&EKRVO4>}W2y zf`T_+2fNmSxnK_GU>gMwgaQSd;j-#o1fK*r_aRtOedZ3)c31P<6KJ9CP6Jija(lCf z4z$iN$8sRmYxm&e!$SUe(gJi^cp)nMnrdM&Dv2;qVLmFYsO0^|XtqF@d$E?An>0&3 zSIwTKF0@?@QJyQo`IN!5gRn5gV%KKmI_ z+0|S$5EW%`E^e4Jafh%4bK@Z_1tdELoEL7;?~;=rrtwD>N2d2alN^CUf0gc?%}Wh34IBQ2zne zpd^e*n6ExT>IZidm%29*AJ~iG@8s}X40`to;SYJpO9;8%U`jNQ>YXc0A#YR2uX+{2 zU;VHM*<~=rnL_S;&=kTC_VV`iDkLtmn~;}#3fc0tDTLpD&-TQ{scKn-X_XVqUKj`Qs(>@^Bq*G?L|($k&lybcS8=J2-DV2Op^!Iw719w9B8Ksy!Nl+Ls!zOU3Rq47 z$9fgOA4ZYa{E5cujUSCeCuoY*E2HOPHCvf~u0hQKb@)(04NirKO8oUJam_cw>p9?^ zEO{Aw=YL@=y$ayV1su#V3(Mx-Sy~L1LP&X3u*m(6c6r30-45D2Mss1pNeHxITGp)P zV!X=y0~W?J!G6wFB_n?WMqKm%YGEAirbQO3<2WgZ-wU-ZH^RWDEG+p3%lIJ6^mSFK<%b;hj99bu^q?Xg306<$AqlX+l^n|Hp}>Br>q4uB$uQV0g%cZOS=z z=2!cx!tqRJ2+0QrFLhhwaO4tM2D*PbfN561V+9slfZ_@R7=<>X$c0}< z;r=@7{8c?(_li5a3mt*%>{kdUl7SyK&AtmMmil^hEKVW-6bm@iXvow|ee*HsxQIJ% zxW?Zhue+|rnJOeeEBVT9$=MX42058Xv zXEwb85VKdY2yLrDni0>O?LD_sX0P>zS;P@;kkxI!Nj4MG_n43-VH#xi`aVt6LQv$k z9}xVUph#vvBv=pu@LrnhBZ_%|U_G_}GetQf@wB`g9jhOcYTKeL&P99mFC^Nw=ra$4 zwctNE9e<^?8Q`B&rKq10;G5(*ml(i10G3>80CPls`x*1w;qV-pqkcgLqeIFZwIdB8 zv@JWE=Za^H?J}%%8dS->bHiIcghFa0(%jsZpHzXYl}OXomhYgnFPF&o4P;pbkPk!rij|NPoSyPe@x<$s_)2ahpb&E;RBIumUvMO@7*{|;0Lsq$)0 zdlmu`=2k%mPbyx}i5p+X!87@aB>j&Vi>;r^H$-W`t|1dO79V@an|K4XDrx(Oz;}E5 z_^x=kQuQi4hDSZvAw#C|n($?)WILb8*NJ3BP)6YwhPRKh%C-D0j_%)W566!aZ%+xZ z{)?}nhbzgiV|PlNXrm|QV^}GCTb3ef^8jM(d?OX26P*RM}{6wEn;~lcHj}!&VI>?$r0{D3! zlZo5V4c}w-DTR;7DK}&F;Nv!+60fpxdGh%{3gVJWIl`MzFPVSJXxQ80r5+y>f-{H@ z8W{*&;$0q}3Z%}=-;}h+!$A6`3e^fy#AGM)*j3`kHoi@|DbSxVqWI8V;0&8=1>HsM zll#Gy5`Pq@MX`Bs>gm4EZ&-h6FeLZG(oIa)#M(tT zmLwrXj`wz~h{PX6f6U+oT}i$dR!H1zW5D26C`}d7-R83W!*r7SHO|y^M| zZyb}niOChC7ksyX89Hx5;rb77hUmN}f`h3=dOG0jyN3sVxgl9@x}r`5N| z8mrUt+Z9-gN2uPl!YFA!vz4nQcAk#b#OY{=xhztigWg7|@*K20{A~Z89~Fqh&-N2JpvuF~_MdKJ zRUCe{pJH7eezu=xT^@e6pJ81dezte9E)PH36_tQI{A^bRAP+y=Z32*opY3)5$ivTe zhXCZ^XM2bMfoxJ2*&js@Q%HtMXK_{ay}No+`FK z!0F+s;?cl`=)VRO!TF@xk(*EOwMH49|+XoZupY4-@<)7LU4^j3)@5By|a?vk=mv-XVA%!J7z%U zV~r^v5IjI?$!@gmkD7BHsY}}d;yKPaozSx13P^YECF=zgWZU9$x=6cFLUFDg7-s)d zWGE%75VijKc>YnB6Rakf#|C8tcM}xPlPlQq4+Q6sTFxTNeG>0ey!ri%0MN!Joc0h~;h?!~tjiC7ZkXiz%%79G< z=MHcVe8eJygAv`(ZD{Hcos$!ybNDzF2FxHP?v9=G!9h&i9S^Y@_ZjS_&L?x2wli-z zN~8na6GGn-OMpg6p!+L>`Po*;1b((fskA`?E0!DkQ3el!xkD!HfMCKLZvrb8KGBSQ zCUEMMA>3C&KT~Odgsng*gDWUwTpI3V<9K_@2#8b?`BErkB3}wA{F8ciWjRtqBUV>K z1Y`B3$jHRGYV~boi#S04?|yrwm#?hYT9!hD_{xfHBdf+&R`}(VR)W@`tbHE|okUR9 zPAmjf24(FhIFu2TwNtFipsbx{T?S?C4C^u|YhA3%pseB3J^(T(YpMWbP}Xb$kU?3q z3qS^C%^?68l(i56$e^t0Dvy`Spsa)+ewPAb!= z>z+7hndl+x(R%a{ zqCd!fq`PVlBCiO~dKMU&PDFgqU#%6M8*ze}k@Vb%pYCB*q!SURSeKp~ahi4Mxe;er zm!2EZ#k%y|2t_3zJvTxXfb`r5n*gNeM%V=)JvYK30O`3AAp($|8=|^AaNj;vnjB@MnxnU{F2f`0Q!+v%Yt8oS2Eq}o@9jRe*{mIk!v{b zf)m?H*`X%>0G~M1Q7Wq9fJiCD_lM-)fF=4`2(>4?29!1oE5r33TH6!IqQta;hTtJY zO$RZSL?8s!>B5?FqzNJqmbuP==tv{Y^^B;J)L(JlhPWRxf(M_qUy`pJQbR_N=(-<# zAtSh>xHbzcHy*|LInXxYt=b{97=XVNX~3xBu*zxCSd8m5$E7$eCt<#1AMzxO9H{dq zC=i;(UbHxDXRg~IUc#=6&}(QGe;CrWSzv_%19^;2Eg6Zs841#;7%p|&#}F}yhyBzN zE|mkQ#T|6#lBkRjcWls$C}1h+;TnGyoY&n6@1P%(HBR8bRelfx2OkB=!aC#&z=@x@ za|PG82ogi__A`4ZKa4fy4aElz$JHg^>ooYHpvzFCJYijEHjGbh>MHZe?Wn;ce)j}q zbsJA|4icY_tfD=mq4J=+(5MZj#`RHA(o{IK0=RJ+1) z`QuH2hBtl?Xn6czHB?4Osey)Dqp|tO9%BsJ&wA9LPp(14E+sDY2EKb7!e2K?Twu^< znyTXc#?1!qxMWw3<2}O2C+HWyrU@Q^2~rq7Ouk~&)%$is=BN@>{|x%5`Ka)B84`CG z!j_5-n7-5VX^}9Ezs)YM1V)@k3|4T8OWoNYJ&$SwX(U9AT93+HR+><`jl!QZTC6pM zH&Qr0OIkyNW9bXACR0G{L;MSQLhP%i*rTS{nS&wrkG+fCW{ORrw%(DsLQ86;KKZmM zlE+kpruVZ#O<$qND-G^$n%cKqzXo%k`l-?1ub^V;_f{a=PJUW)bk*X9D)kJ-aLnu2T`d;B{AA)aZq-_ zMNW%`D|T2^bSMPp_7vRyyeas4WQjuX9#$y$M^q9Q{(FKWQ*wo_0gc}nh+r=?{&1)v z*fq=$T!c#EP@~1a(3orBW`ixT2L7~wg1H7hq?o%5x@8@R@c#WMsSr#mF7?^_(as3( zmyVajyJdKHrAn}ScsH34-uXLhK9!8HzCk62pgSz5m?@r(*Mn%5Q>o;hjk;dt*ni3x_$jlELMhVL`UFc%AKCoc7) zxmYEmJ^<-7C;UcZFS{*~nELE*Bh3%oRH+mJuzZD=avC2L)n2_#?Oa1H_Q^*kt`L;e4M#0OQzd+!tfZf!g?p+w3-Vr@_k1KkbhrWxE9Aru@D&UFa+& z?MDNh4)kz|mg^wxEW_VsfKnnT=i0ZdUlYRMd^MW$wck(`FB4C##>>Q^Di2VtQBrR< zno(fdFD<~;QaX3WM02Y!BT*JMbJIS*KtnvC-6QT9xQpjPX07R)0Ci~~GRYpn7aQR_ z`J_35FE+v-64j>us6H&J+%+O6Q&2`bsJN)4s?bNHA_13XNEumqn=GdbVNxar%c{0nn!x!{hJ^6d-0Luj@t*hr zVEJmU=T9r`2a9-bti3%6?fysajqNQ|L%cV(Z)DZ--uM>;t?!K;-w(m`h_`Q!6DvTK zZ{Hk0-OQ?Z`{p>sx_tZQIL*3z`{p>qx_tZQ=we;IeRJTu9RTFpH-{{=Hz>0hf4tRy|E)&d<9p& zH+BqE=Rh6#_RTR!e6lCszB%H=t77@~%`rr+hiLis&5^8Li8k`Ru_IHZ_saLijG9oYhq?~NVfRQig1`{u|Iy2#|{osynS=r!>WAy z=D3%`m2cl153B&1Z{J1&7ou+i6v6qV+L70yLM&%fV6Qc?eEW7IfPP}`<>z|vi1!(p zy8X;glzjcP0&=uh(7-8QKT(iN>CcZ_)S?}5ZGP0!(|^@%#72v65f!mIa3sLX!L81x zzy$=w+l>VTA0v2*)N=?%CIJ+$QHt+IZ*LJ4pShh|15x40c!}pmB^Iwa7Lk7zF!(!^ z5_l{H0N#@b%J5Nm(Y6E zI2~F-u!G>AC~6to4ow5NpWwypXfeRpa(tpl;B5c{AA`hgqjLd*QQX1%F#jCapyxpv zJ{rsD34`vZGR!>VnN9Q0weX5p&voF_wykKAP?IWg_knW-5a5=d#3fv9p%U*&@mNR5qV znlYAl&@2i6g@!wuLqJASgR$UgFJ63X8P5+ZcagG`8N}e-M$7&PmI*IdfI=yT$x_^_ z7Q7J6fzaXUav?hBFGQ#Dhg?I%POh6F%q2n|Vl`3jSY+V1hO#nLdiF5x(fAmWYn%q* z@9*KgBabA7zAdmElE6xmo9CDHvv_`)VyN~|dWaM%B#WDH6F~^nl;Or_kVDxUzLtn) zc5$%S*dMr!iTznR}=e#?SH;MXu>p_MmQo!ycCXA$Zs$us_%>`-9!GKiL1<`-Ab5alLK|{ZinOYQk370Y4&+ z6tmu!4cMrTQ`Et%IzM709Kun!`Xfq<;}=Y79T%S7-18e&XFd zR+YLf77H*|>{>I}-+V;nSfR;G_H!*kv1{cL+)HpCCFYSiXAD5GiOnLl2H=3@kT($8 z;{9J;LbVRtLs7x~y+yyb}BA!}^ccZ7NpvMnD z2tXKR0C$K0K9m96Ap-cYfv}?+|HVA=hA#W7$x$&8 zTGZgEC4>5_E_}dY8>nbeQzE8D6oOStQX(Q|fQ}$*RCUO60t<&^L9WmkRf~#1E zFob08w?YI&iX~9~{cwP4l=^P=@q_$!)+-i)Zf%tO^w! zZj2AWo$F%qgp>(1H3g&=1v?moiqdAVkm~j}E+bWO}_%n0( z7K|7N1~oI@M2`3wCnkDiaQKv#loPN%l}iqGUl=1tltR_;`iGx9re-Rp?j++aMSveu zEBQxbdiCHWjR|tpMwvY60i+{{%)nd-SE3y=4c#GDnr-Y@C`v`Lg6@Oo{V9Pd8yC<` zsDeYn15`>vJmBQu2{F(q7~By#V!kFi0^?VdvjQBjqscMJ;1K>Y836r708=szg~`mW z;Lr#uL5nKNbh6~cOY)3>_y~@=zZxBy8~4juLXVjd>JlucTr?K`SQul2!SnnZR4lq| z$ewk>2K-&QxGl4Qo*h)$hDQ0p9{K!R5Cln`V-X2(u7wIwowto?{(ow_z4breDm8=a|mT3>e=HUtAb$*26lB zgi=spv1Q}{agfn8m!g~(0aHbXQkV0ML1LOXloB}$-85Nh890{3GWNigbb+Vcx@u>L?5DDYIsS&bPkEQG=PAwNmR8&$wu9yIV~fic`$&R zu^a*19t=SL(`ar=LL@iib)*@u`&m`);Q&#VdB=p}wVY{a8pI_RPye9#IqT5?&9wwD zMowb3h_|tp)igl*Bpq^~$%9xQ6ecG(&Wd2K@d16|X41q7GFA=_P{cx)G)vr%W{0>u`H&*PFkX!Wg6fukHS(&|dvTZt79o)SW?#2O~G$Bb#1=y&p5C zM0aQ7%83b}FpDxyBS#hOM3BdC%$QyT{|GQ+V!MTtLBYQ0dvP)r!AXKA@p|29s%P<8ZT`Y1$vy9+C%teza7tPoJ#Ry?p0fc20We1Rf zMK?}H#qpA2jfy!E6%3OT@{*J6n7*!9;V$2M1kaNKmfy zyTqWT=9~|ea~^c-z-+pM%p8^smBRx__(u0Y`TviZ3+N}Nj0WM36W90E(JJ__j%{je7+b}^rCQ+~yLQsV z(up}4)m61kYcp2XHjQ0Tg_jWBSjxwY8H>LdB9YZqE5@!YFaLQvaF{~7HSY%eyjkPr zJ$n0@LPqy}EA}ra72Xw1E4yo8iu11ZDPyZDv$Mutx@JOc+1TQ;@=NhoY#Ybs&Ck!s zD=yAhlz(={>T#KqG8WFxsIFZ*F~=O<*x5z+zJu|Hqw%}hxbO!7s_MpNuA+Ub>Kc6w z-m)5)pLqNQTHx_G@4LL-+bCHCJF0li0ee=}$?BT&Rb>sbI&DQ$Rkbgps&)pYn=LV= z{L_t6M!8^TCm0d_hHhomN)V8}mRHx6msNX7fhwgd${N`MSu0vl5(%QMp|2qlx(I85 z7pZ1F{wk@rvZ~fwQHsAzE8Ew4*OZDsqh02!qn=@kg1^4&ZKx_Q^)-~iZ6r>1k2)lR zvj*ff_>-?dKsl*`BpMxKRJ2E}Ph!ifOY8CHXZbg3fe7b#WQpz#lfq__*O-J=G!oue z*vuc8)B34Jck90|JX<@Wy!?6V7gMKS^2O8{Q&-w;+C8fNGTt71vTm9LDg{s?03Ygy z*KOCHZ{DD{EIXsGd~E5|rYe2uE$OS?drq%dy|h)|GPOyYQmHqd-?C+C;q(?gUt62G z@KSwvVN(ll8#mfgV-BMJ9uT&a7^06CHnrMj#5|`r>38Gb4t>>@F&B(|#WpU+Zo{_{ zRPCYW-MD|K*e0k3=7Hu%Lu@!M4yvsqU}>`Stifk=6Mng>~^Ew-sTbm z_oA}fz&#(}v*M;1e1{ZUk}2`&0OujaXS9C0xgf#TPXO?qRf!G06%m{#`YX%HN6f=yVYF!CH z%M}7VSB3d?MeSon)xz{lZOy#xD18^5rCna4MQ+eX;-^+^c3X&E8Sp-`PwJx54r@ z?Q>3ps%=+42k@yF>2|e5=T`YwF_7)*_OmU(yMm1)*`~7oxoGkXhFLW9!>niS{N!^T5xwOQvns2I`rG z`q-&UubGYvLrmJ;ydS(!7Z9T~-9WuH#5Rg4f)*m?{2f$n#k^^2rimhqZee31o3@%L zRda1ep=tH9= zpqw=5VNM$6ulGq4lVg0yr@es5iBvUT=FGWnEizLpKc>})C8x!3N7S_8+z~acbY4hi zh*oz@iz(4+<~4mb_8KiVbLrH}w4@U4>}lG>Ol{u07Gft>YHR0dk(>4TOIk&`fuy&o zx~_JmQd%k>S&$Bw)|ZwymLd@^M=IW^_^v3eX!J2>R_f}#4a~vaNQvDST!f^$acyNo zjZ(U(lo@}c*L!J2RYNKNn$Fw>OWd=^jFEra(VbD_YpV55U+Jx7)()_&p?uYJJaJ`A z%yDO|bZ4A1&Ye-&=)Mtwt_&s&aOkd>5{*VWXN)mD^V*5qySLMLkAtAbXgjjQS! zeEil}Y2i7gNc}G@^(iasOY15t8}TWxucq|!in7ZE5llj*($&7Q71dso1xoX;HKC|1 ztExs)g#KpX>8GN&p>AaZ9-~0vpCUrhRF7`U@jRoHE}LXLiFm83s)~y%mDQDN8q3RS zD-~ZIo-rEu2|P7g(@^E}if)bd+*n$n)S8PMJf(Q%EiGRw6{uS4t>AELyfvkj)nzL| zhg#f~E33TK73g*XM%7f09?Q`blQe!D#*3+`!Sl1v+qlNt=mVirZ2V=_Rb@g#gSWE0 z4u5!)6ICf@aJ8Xqg{g_My3s(DR~OU@GcU+rSXR5zd$zZs)?2NBvGh_|t*NVpmy}o4 zl;KgSj(_hFT5{s~9VV84OT!NW|_6zW1;l_B!<>0HB-Jae!9PdhR zLt$0Hgwp@h+SNcwR#fMjnH82FahLyJ9hIOc3_Hs%3xuEEU3Ou~0`mt9%fj29o}QVO zndzasXJ&Uv7(AjOiNU|kag(6E{%}Gf3NeTni9t~hCgg~QL-s^TKn?0q6VM17;JEp2 z)&1(dSMNO-6W=*Ab?er>w{G3Kb*o-=ca=0~16qQYKT~du&%rA0^rm&QV`J5Zw4^e# z!{9{|eQtAQX?VWG^Q@C}L;eMjv$2Nt%mj$V5@J30NVRN5w(7H!XsXrGjRUtLxdLz8 z=#!CZqd8HVi6lEq(HS8Kbp-;SWsci`1Ry?8KcR%%eMHdd=RC92p!m_z$FZrY4qAB;xo^~v(& z%tYBV>$8nY)vDK=n1y}kr?F2kSF2Q`Ni0jz^$on^kJw!gJ50xF`0-{PvSwNh8I?0w zUbZkS(b;q5$zpK>d8V}O+Yhz;=MbcDf zg+$!5jEF4S2WYko8>kO|79MbH_wKg7ZimdqMty2Xwuzz$bogH|rma^)nha@#BkK_| zt%AdDUe(Z=Sy&D49X8}mjWppYu$=WT+ynZmetgXKWf zGGj2oe8nZM_3%bF6s-{vvoIgpZX3bOJ3C|d?4*kEdfnCmM=g)`!m>y(@+CRbC97Pr zT6W@8J#n8JRslBLpmTH{SnsCrewo4X0;oDowRE1liVjz;k4dxKMjbi@(Z7PB6Ng_L z0cBE$)hkmolHFOKPOM5sG4aTRy~y5h(~b6KWujr+Y>^6>V{j#y5M2Y0hf;o$9Kaj4 zPDv6o?4S_6>N1aD#ekS$7bh5qJEpOaisoyr35+Cc*BUDn3oAq>+kj}x=w#oV-!#Q= z)WF*`BjUMcjss~7d#thw;Ur?m+0=;jZ;Q1-M`C%aIyEDs&D5ICq6XtYBAJmE!UHXx1Ing6U+Z~`#`rn@S}%Z{iyQ?5XZqG$!$3l{B>E2d?>TDTGdc?C-9lD^8*~+XUf@1;nB2LtyyJ{U9XsE(a&%@- z?-|ljmB};Qk_X<&6|$Rx4JIDc!8t=3KNB>5Fq*86InSD^&xwDcCpP*aE~|5y++=Q( z3Ef6(SwfgeJN+~0nW#RAZGX3dM+cx4(^%BN+1>JBZM>;(=0y)#(2Tp~R2iNRtJ!T# zJ2PYPjwZuwt@@}WGb0Tsfi)E7FpIsxC6icHq}|f`p}a(h51=8KJ#DPAehj=_JxM6j zg_Rv=YOR@+B5d`cIg`P1^A5p6lgzX{-N zPu}@zgDaxJ-U$C472ZG8fgNR35!vHYEwm~bA2CL3)F-v zW~oyTDw?;ixz<5fx|SzS#T7@91-=eizd*MGbkM!7+X!kGi*|i!wUi#Qp$;=TUi1+7 zF{8obWD$Vj2q)L2!Xl?yw{VicW@9|!*tDPGFx_y=OJ1mJwXwchpV&CUxiQg}-ZeC< z8{Am!xQ+lBCbxmh1~-@yM7&5oS8gI=H__eOlQ}F1Witi~a`cBTbm7zqlGX%Xj3y9# z%=Bbk-aIo^LXkBP65-f+TRZft84)6q(cjoI(byxiA*7)z_>qe8aU&ctur(^j*JIX^ z<8dPQrl2qF&P!|zM=PhWBD?UjuRvvN)WEUCi{%A+ zg$?OupT$CQB2Tmem-E<5j!!&=hyVShW}k zLl~B5BrDCP(oa+;XRx`2g&E{FWV>Q)6ivn&MUxv`ZW6tB#cT_sUOufIlYi*%XnJG^ z_A^kn&dzijrL~#vz{Z*^aIkpBTc%Ede23AjEKEmhQ+RuVig>?5T!fRjH=-uitkv1d zB-YcglXI9+d8w4%gzWfDqIqKj?~slb%Ns~oi81YDvBx1F`$)@iCi+?l4yoU*!**?{n7EYFhbzH4=@LskK(?^iic9`pEV%7!}E1>~^BKSt%%07Zr z1yrF1_Xn-)G`*SD2`;Qqa5OQ+3oew;EH(!>cn=i}J&6kC`?Nms1Zh6ZOCcBU?%V3)WFPP4dtZ#AUI)d2AQVIr#*72Z z&*hGY%)?>D3lQO@yeBa;(n&+lf=syN=>M&ag^0q0b7jvuM;0di! z6aOgw2`_$#;Jqg~F03r?SYYN$ij*ZNVT!ke3E3881wZx`^b_0}SFn45ncET-5KdHp z`<#M@75uVFlmw6$}&nq3?mK2-4=#1H*0RSV7Jj zm*SuI;;$h{E|GOjJ5bAzahEF5X;!oeKkXIk3sn%ZP}EIIPWGgzyiU|i|}J!RzJbVy{sFvve-o4JtndUuQ{n?n}KBc9ggmd2he1p(@N%e;G=IC zSoMNw6LXSRB{MRpEvv`aB$%)cZp#av`!xRzO~FTSucFX`>3pstOBUAcn0C~yuBkz`ns1CWo;N`trf*8 zJLoi(xysA#Bgi`7b+^smuN8gCSJ+386+o!}3_TO_aYKASf$WR(5ua~^h`UrAR=j`HsQ>jTJ zZW~G_O=7AENq&+MvnnHoG|wvS67zvQ^KIs+E0*j8sgolts*==jOoC+0Y46L6P?R(s z8jeu^9rJDG4)yVKh#>28grTAoN2(mHKCUwF(eyC09j{@pU)p{0((Y4Mp4`4)@qg>< z?hk9B2-X50xh>;zAyJMhT-v-Tj`ajN#B;d*4<$>Ru_>yHTuPNfK$n~MnvzB@(8+y`Ppg6C@xc2zUC(TC8u=H@{lJdE(_$mR%h;BO^=jWXx9=% z_+3llfEzFwaZ{7U}DLWjlvX070_ciKR>MU{aj+G;b9zJ5YgVP)T}01 z7Rvdw%K3_>pxwK&WT`UafgN(U%15VmToMXIp3S5lkjdK~GBUXq;OB%{W z{iVG6|4#M$p{9_zuc%EwSDK@A6$)D3z>uk*_M}sTcDP6`%~DbT!HwY0)mPyqcP)@E zOmX0aeX!sm_&GnlUq$dsigG&)%&~%;oeow*nH{E|pq$Am5B!uM1&a9oHgmD~$Z=%< zpP(pwdQI?rMTx9|HgmJO;KCj*!Owd=wi2X1#|ZC&{KML3!QkI%I5yzuBibaOipL2J zbas>gVp5XCt++0$zr)Zn*!Uz!F7#dThAD|V%rLXX!x1vpW#Jk3|?8dKq)G$HXI`3tL z4gH0y7(dNl(e|*`h*yP91rN})%RpOdA$O_DBfOy+d38E!T-&CA&=97T;VJ}aV{7nG zKIj(HOjh}D!ia>&ReiYdo`q@6{B9D-N?*y42dm^Yns%5itdwb0i)x;vk05J+4%wY< zDoRg~-8GTP{QFhc4pU)OOotWNswujX;C-HJkRY`H6+t#RXI;nm#Lt?*Meqw=b|#i^ z9J9m06?Py{)Su~>6^OYaf^?Gu%mQ8mPi!+!w^{1&80c&h>mvvAaa#-HIJ-hzmT<~{ zSO;b0`9FP&w3s$|??9Uk4%5|&z_dw_!&D}aoNp^A!_Pe95Wys4@dtUV20@kJ_q?n@ zg3l<*&4|qXS4>DSS!wYnepC_jB*7$O@yC0lMm!*xWE_4~!{&9Aw!*Oe9C*w>>YG_5 zDbuGFphYlc{->g-7f_^TdVvD5U=CnXm@LVFFH;~(GW6;sSB4}+yiW_@2O$Y7+NcF$ zL$bZcvuz>xhl=VlJ3HFUP<)rUDf>-xnymI>FEsDSMcvsG^O%T|)$*q`eMVF271AH5-)&p2=~0@VqG?FKLh(B^y-CwQFQDIk zV#$iwsp+knzDLtLH076{_{AE2jVi4FKCS1UH2s#QPZZS86Z^b8rQ;X)?tK;hBUFK7 zG(AVt8+4Ty>bFV}Yc&0BP1kD57x*E~U#sa2n%=DGzvu?VQr+<23p-!Rx9AJJlKX@o zPdugieP2_4AmRxXxUz^U!(nZh^Fk{kp6Xwzy&Av>CyCV?T>JEtHqgw-KTq-a<-;@4%PH&O`p+} z%Nfo3C5-qV*>r*seoa|z%$S~O98}b^cHO4a@r!cJ;gFjiIfY{Mbo{ogM{*NooH^4o zx?>vhT#6s&*%JI}w~60ur!o=B8r8|NFeo&G16z9RZ!ml0WEs~T0?mLq|HcUgOa3X4WY8r3hO-`0UOSa=6<{ z5p+t6#6OLB%$hd?l5s}C>r0wKCRtoW4 zg+Gaxtbgx;CFG3RtHDQZEl)n7{jNvh{)GNNJ-B4ae~Qy{<(YJe*Prz;mC5Z_3I5?j zN|s|-8inUxm)JQ(Y)AbSZFT|>JmoDtv}B8xYV^9q=Cbx<`)@^glD|*+`;`9{Da0dY z6cpj(3Uwui||E4*(_kpH+Uj085@_f&CSY%OTw(!7Rzo z-#wTkUHJ#8sM3>9v+n3rWT^kH(@M5z*J+M|lYz@QTM?FY5B{Wn+JBGo?@|7f0|B%j zSnP8b{wMjry1HcXU)5Pb`X~8>_PZa2N&ZXDDp~SN&T?Gu4eS@z{}9Ly!yn1^p1%z_ z_`mMUhxQ8j9|f&U#N_{x^8ZNrL;5hk7rdfB`JYw(XBWx;BzUFo$iGkd_bER+hW*R> z*{)}T`uAP94H#qgU1$^U?=BbnKjr)c1xfu)X`3T0xn#E#CLZ!J{cn^d`FF3~W~q0t zbxi8UE`$H%BjQEHvHcGz|J}-e7YQ}(4)Xhvm-<%(c=BHs;DuHmrb_`3b|vor|EzWC zil8vanqza6bP4&09W3Dg{x2QHEp&Wk2>F)-Pv4eA9KSZtvXEc$jD4h$bm@VPa#-zuGH@lq<4+!jWeJaY^=~`M@qdjBmMd$2Mdv16 z=&iFmOPR7RXgrjQ?=r1i{1US}7ylZpGfF0WaKk*-4|(_l4c7-ro-SC0^L`qCkl}ou z#vg1rPp9#R`1v=DKh$tuP2=I*`jVfSM9@K*!h0ee2C^OGChIBebGv;F?ZDrZk1=T9wt)_)!VKCfL5 zTl(Y7CF`~UisR4YfJL9-0RNQY&sIFwThA%J6wu4(!y>0Iz%K({?8Z&BQ@}6J+m(Ks z(j#l1a{=V(e^CLxzW^^EwdTp0EWiVuOD~^s;wnS)*f}Mqp#FcC#pjR5yn5ef>5s{b zyE~;~b9iRF->>*EK77N<$&UAL1K*9=Bpkm_0)IH{&&}_PRR3Q9za0Jb)^*z~hSN(Y zCNaUA0Dmy>BIo_5l`K87_Bmeh4|SJp{#1uHzoqza)AclqSE596VFA7Zyy&wc&}XWE z{x;yd6LJZDxB&mD0y*;i8E&XihNO=b;D1nnf4%@O?{j(b-)8aId1nRi%hA7gt=(qT z;dng1K+Z@3ey#xjp#uD!1^6!%;2$l(|D*sfo5Oi_cuN6(T>*Y;0lu#IZHJev-S}@; z0li%N+)biH`a}W!bHFdhn;+K$$hrp`1u|avQ|GV259GmRXSwYy72pSe@3x%jf0Mv3 z%Ng%G3gp}lyo{rz!8rP`rAK^td&yS7&!#`6^xXmd=M;Zzfd7h>lh#pk|6YLKFLH1T z;=St6-Pe z3)(xU_#Xv&%6o?NJ3mxfrSjiv@oZ(Bww0b2>b5b4mcZd76*RUr)K1Y=Fy%t zE6faGY)U~4muY1Lyg6Q@2VuVBhMz);T`bf-XC zLvI{dd)bC)!{ryj($4G(YxP%b+&H*lC>mP3Zu17AkPn4?`{fwM1Og{IHgLA&Mb})u z_OeaujeN2rr#uQS_atV2MtyMDKaNZuso*}4S=%vSjt_%$;_QjkrP9TlH?3PAt?XIZ zvl>TV^3YjpZXWDjDqYdH;qvvHqu!nsi8Jw2M_9&!&g@x*7LChUu(l$x$)$7AfcoG1 z$#Sy^=aZw}V~VY6|G zt;~KH$Adua5u-ROcd&J~92`7?=rSCh%8)d4*?NglLvrtncT0CoL%@bxIpi)#xqty9=@n1?^;SuqY@ZS* z7d8Y<6uok_LKKY0CNC*TNsEui&N+iNzQ`qYRDO5_a!Q~-N_fB@VxKCHBG^sikOnll zy~dsP8ni2Uw~0OEbuuLI0Ysvc9z&a^aSco!NS`=?-W~>@rhnxVgCl%?t)b4}j}qLU zgTatE?lu;_%AHJ`82v7sCGYk~S<0Tt8}kcwd|v@f=B}wp6WC+K-9i580(!VR+c{NF z_SzwLhHxlAuUybEk`h7bg-+|eB6a^yAR~JQb&4RW+OH72%Bjr>6X^vzIBGj?O7AMU ZXex+P*0(YdP|nb{Ze`EujYUXD{4eMm{~`bY literal 0 HcmV?d00001 diff --git a/benchmarks/opencl/kmeans/kmeans.h b/benchmarks/opencl/kmeans/kmeans.h new file mode 100755 index 00000000..b263d38e --- /dev/null +++ b/benchmarks/opencl/kmeans/kmeans.h @@ -0,0 +1,65 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); +int setup(int argc, char** argv); +int allocate(int npoints, int nfeatures, int nclusters, float **feature); +void deallocateMemory(); +int kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float **new_centers); +float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/benchmarks/opencl/kmeans/kmeans_clustering.c b/benchmarks/opencl/kmeans/kmeans_clustering.c new file mode 100755 index 00000000..85afd424 --- /dev/null +++ b/benchmarks/opencl/kmeans/kmeans_clustering.c @@ -0,0 +1,176 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/benchmarks/opencl/kmeans/libkmeans.a b/benchmarks/opencl/kmeans/libkmeans.a new file mode 100644 index 0000000000000000000000000000000000000000..746405064699d2d92382157476730dc86dc7f227 GIT binary patch literal 9346 zcmeHMYiu0V6~42xv+GxQWa0>tDmWS2v5jZFFMI7Os$z;89#$GEDj`~FygObyE^DvZ z-IxTS5_jz+gp`1W5L$$43zC!)R8sS(5K?2I5UE0~T2&E0kN_tMI?N1H63aP3jO>5QHGOcousa{EnK}S^- zb`Uxr?PN@bJYO+RdQvyy^Yyj5XQxD-SCQ8mR#*)DQvE*rYvHTDXqi;LU8EWA zYK#P165&vjnFxmC!FW6wibWbDjnS6QmgdGpVTPBtGS%8kY5M9+=~|iVIjX6c$J5wj1d;{TYy({Ibk7h2VPZ- z7Xo1+-vYdv%LxlP*<+O-F9gCuei!g+E+;JHKL%a}o>Cwz4Pxty?&r+}B!dWt|; z$R7t@PR2zbEaViIWO_4JR8lM}m>YpiG8GT>^!4P7cqS9y!Hl~%hr-6D_V3@++HM$| zjZioo-uxaxT^=vL~-opYk-Hdo=C#HQjn=xB(k2rl~tg)W}>s0fX)6b-tbPT>Rp3X*7)OhS7#0NRt9(3k5>X=q~MT zVP0`M*crwU`KxQL3D|~I!kmIE3VhVg;k@|NVjB?^m{PW44mBg=GGLlclval4CwPdS zqVD&AMO}b1_!$UE?$+b9Cbb#8eqeWbAG^{>PqgU`US61l2&6~rhX_;`J=D*u@Q8XR zfJGUAGnjPU@)2a9{!xFaJ)*z21CwqL4~0oQuiMCZiF$(oghlwhI|8AIZUm6qr_&Ja zB;BK^mzEQ%mx9oH3V<(Tww%g)k*CvgMeU>hH-uw+w|72^?fH>#kgF`d=Wi`V1qj3$ z+A;iS9ro}EEpV+*s~_}fb&vV9nzeqdI^@?XTm9O)JN#Nj+^+@hRgkVAUD0Yr6|MT1 zg7ZOc%}0 z#V2WXqs)4`8u`w6)dTC9wrhwDg%i9?fi z{yRI$<=R1sbR)i3nK^LElV-TkGaI-RD4V6M(%+jad{fz8u6`z|{v1d_z)HZ8q1ylE;EyX@o zxaz5YsL&C-oSq)Oh2ygu%gbxFzBw<a$oKSEzThtU2fs@d4^2?qXyvu8YG!zYRR zNN=;#Q8w@iJCFLy_26ThBtSY|KgT&O2~J}UI-It-Ts`Q? zua`U+L)m)xcH{%6b02kIZ}2M9KN0v z9_hJYdBMG(gF|~unOa=NEO1(#;dAjUm)v|-X8Ekze7;Sx=hwc;>B8e>lPc2Za2~uQ zKC43{UpB*M)eAnOU32)1xutr+XZF--yPQk%8S~HQrLw^1L7VrHU9T`Rb$*t&6CZv| zvcmTBQ*-(JA^2>31TM?qvKL(Tg3DfvgBM&@z-0woR={NiT=s#>KG@I)F8jb`|5wRn z>p(TuF$L?}E8yEW@G$r^?io`M=iGI83(mhlT%XI|d_C-Up#BQ@JAyTNG5(HVO}-R= zN4_HdK3Xc@cnSWNKF|64+&un*!~4qqZT_ld;O~kri@#6*PxBWOf95!kkKn9G`!0o}gpo#p z&X6i}D7-@$>nw#z$W)v|DfB}o@;*SA=iNh?=RHao4~4UYdEOunvLf#W!aVOSgn8Z+ zVV?IT!aT40Ho7!tq2r;Oy}4e-{ph+H&c6iq$20L%3LeYE<(-tc(#^fv8Ak86RJJRE zD>JT;4#3sX0jXTJD~yzG`|DV0@%JcHggHoiAI@6uxUyl$>_9_Hm=CdiKEH)7Ej)1Z z+ByPL(%abUUc#61(Qr^2aixZ1SSE2U-ut)=($@2$Zp72qU9<660P#$`ypKc%JX~f<23QW2J1F|R404i@WALUu- zEVO^u^Y&3$6kenXTtP%P0JwdW&!S!&)h_Z;9)zBIpQrL(gdUYcM+0g*t(^_c;$P9F z`6xqCkzw(D{vB>0vCsQZJMPF+@KNFpItU-)WAO2V~JQOWl)_E4>)mfVK;YF7AZ z=@(;r(~`OyJ~Mv_{1Wgvoc`lfz7qb?k%Me(_YwRLx(V`g(}gD#@)u0P|1|~w0E2&k z{C*7n0S5no1pk0UJ{I@~B=p_Wy%XnZ+Xm==J^EjV{@0-Y)zJIxuoUtA+3{uVhcBzT z4UdmmPc*Pd(J9~)DAR}g{{>lHSq(oX?)Zm&Z0u)mviz?VkNnID$-2H4zN%rg0e+kE zIyPgEyLNDUtnnAxcT8EZ5!u6fWDWeDeN)yh5BdswV9acL>}ry%-J1|!0egJ#MPztC zvxe}W?=C<34xdeb+q8Vh3xDci+aF;K-3<9>(}k6O+a7Y=IBXzEa&0SYFbEr5dH+@P zI}aN~_K+LIKhBG4J}M?B)i0z>y;y0;Lf1&89aMtQLy(SJS^X94@q z&4sR$b2sB9TqWmbSn!WX`<#Bc`{n$d7tLH!cobS<@1YDEFn>k}@IWS@z)dk{ADwh9 t(0r+64O};9=fb@Yg)g*U +#include +#include +#include +#include +#include + +#ifdef WIN +#include +#else +#include +#include +double gettime() { + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} +#endif + +#ifdef NV +#include +#else +#include +#endif + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 256 +#endif + +#ifdef RD_WG_SIZE_1_0 +#define BLOCK_SIZE2 RD_WG_SIZE_1_0 +#elif defined(RD_WG_SIZE_1) +#define BLOCK_SIZE2 RD_WG_SIZE_1 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE2 RD_WG_SIZE +#else +#define BLOCK_SIZE2 256 +#endif + +// local variables +static cl_context context; +static cl_command_queue cmd_queue; +static cl_device_type device_type; +static cl_device_id *device_list; +static cl_int num_devices; + +static int initialize(int use_gpu) { + cl_int result; + size_t size; + + /*// create OpenCL context + cl_platform_id platform_id; + if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { + printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); + return -1; + } + cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)platform_id, 0}; + device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; + context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); + if (!context) { + printf("ERROR: clCreateContextFromType(%s) failed\n", + use_gpu ? "GPU" : "CPU"); + return -1; + } + + // get the list of GPUs + result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + num_devices = (int)(size / sizeof(cl_device_id)); + + if (result != CL_SUCCESS || num_devices < 1) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + } + device_list = new cl_device_id[num_devices]; + if (!device_list) { + printf("ERROR: new cl_device_id[] failed\n"); + return -1; + } + result = + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); + if (result != CL_SUCCESS) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + }*/ + + cl_platform_id platform_id; + num_devices = 1; + device_list = new cl_device_id[num_devices]; + + result = clGetPlatformIDs(1, &platform_id, NULL); + result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); + context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); + + // create command queue for the first device + cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); + if (!cmd_queue) { + printf("ERROR: clCreateCommandQueue() failed\n"); + return -1; + } + + return 0; +} + +static int shutdown() { + // release resources + if (cmd_queue) + clReleaseCommandQueue(cmd_queue); + if (context) + clReleaseContext(context); + if (device_list) + delete device_list; + + // reset all variables + cmd_queue = 0; + context = 0; + device_list = 0; + num_devices = 0; + device_type = 0; + + return 0; +} + +cl_mem d_feature; +cl_mem d_feature_swap; +cl_mem d_cluster; +cl_mem d_membership; + +cl_kernel kernel; +cl_kernel kernel_s; +cl_kernel kernel2; + +int *membership_OCL; +int *membership_d; +float *feature_d; +float *clusters_d; +float *center_d; + +int allocate(int n_points, int n_features, int n_clusters, float **feature) { + /*int sourcesize = 1024 * 1024; + char *source = (char *)calloc(sourcesize, sizeof(char)); + if (!source) { + printf("ERROR: calloc(%d) failed\n", sourcesize); + return -1; + } + + // read the kernel core source + char *tempchar = "./kmeans.cl"; + FILE *fp = fopen(tempchar, "rb"); + if (!fp) { + printf("ERROR: unable to open '%s'\n", tempchar); + return -1; + } + fread(source + strlen(source), sourcesize, 1, fp); + fclose(fp);*/ + + // OpenCL initialization + int use_gpu = 1; + if (initialize(use_gpu)) + return -1; + + // compile kernel + cl_int err = 0; + //const char *slist[2] = {source, 0}; + //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); + cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateProgramWithSource() => %d\n", err); + return -1; + } + err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); + { // show warnings/errors + // static char log[65536]; memset(log, 0, sizeof(log)); + // cl_device_id device_id = 0; + // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), + //&device_id, NULL); + // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, + // sizeof(log)-1, log, NULL); + // if(err || strstr(log,"warning:") || strstr(log, "error:")) + // printf("<<<<\n%s\n>>>>\n", log); + } + if (err != CL_SUCCESS) { + printf("ERROR: clBuildProgram() => %d\n", err); + return -1; + } + + char *kernel_kmeans_c = "kmeans_kernel_c"; + char *kernel_swap = "kmeans_swap"; + + kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + kernel2 = clCreateKernel(prog, kernel_swap, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + + clReleaseProgram(prog); + + d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_feature_swap = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_cluster = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_clusters * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", + n_clusters * n_features, err); + return -1; + } + d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * sizeof(int), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, + err); + return -1; + } + + // write buffers + err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, + n_points * n_features * sizeof(float), feature[0], + 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + + clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); + clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); + + size_t global_work[3] = {n_points, 1, 1}; + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE; // work group size is defined by + // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + // 2014/06/10 17:00:51 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + + membership_OCL = (int *)malloc(n_points * sizeof(int)); +} + +void deallocateMemory() { + clReleaseMemObject(d_feature); + clReleaseMemObject(d_feature_swap); + clReleaseMemObject(d_cluster); + clReleaseMemObject(d_membership); + free(membership_OCL); +} + +int main(int argc, char **argv) { + printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", + BLOCK_SIZE, BLOCK_SIZE2); + setup(argc, argv); + shutdown(); +} + +int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ + int n_features, int n_points, int n_clusters, int *membership, + float **clusters, int *new_centers_len, float **new_centers) { + + int delta = 0; + int i, j, k; + cl_int err = 0; + + size_t global_work[3] = {n_points, 1, 1}; + + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE2; // work group size is defined by + // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 + // 2014/06/10 17:00:41 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, + n_clusters * n_features * sizeof(float), + clusters[0], 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, + err); + return -1; + } + + int size = 0; + int offset = 0; + + clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); + clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); + clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); + clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); + clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); + clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); + + err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + clFinish(cmd_queue); + err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, + n_points * sizeof(int), membership_OCL, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: Memcopy Out\n"); + return -1; + } + + delta = 0; + for (i = 0; i < n_points; i++) { + int cluster_id = membership_OCL[i]; + new_centers_len[cluster_id]++; + if (membership_OCL[i] != membership[i]) { + delta++; + membership[i] = membership_OCL[i]; + } + for (j = 0; j < n_features; j++) { + new_centers[cluster_id][j] += feature[i][j]; + } + } + + return delta; +} diff --git a/benchmarks/opencl/kmeans/read_input.c b/benchmarks/opencl/kmeans/read_input.c new file mode 100755 index 00000000..6845453e --- /dev/null +++ b/benchmarks/opencl/kmeans/read_input.c @@ -0,0 +1,338 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** + * **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee + * **/ +/** University of Virginia + * **/ +/** + * **/ +/** Description: No longer supports fuzzy c-means clustering; + * **/ +/** only regular k-means clustering. + * **/ +/** No longer performs "validity" function to + * analyze **/ +/** compactness and separation crietria; instead + * **/ +/** calculate root mean squared error. + * **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include "kmeans.h" +#include +#include +#include +#include +#include +#include +#include + +extern double wtime(void); + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed " + "[default=5]\n" + " -n min_nclusters :minimum number of clusters allowed " + "[default=5]\n" + " -t threshold :threshold value " + "[default=0.001]\n" + " -l nloops :iteration for each number of clusters " + "[default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE " + "[default=off]\n" + " -o :output cluster center coordinates " + "[default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters = 5; /* default value */ + int min_nclusters = 5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres = NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + // float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) { + switch (opt) { + case 'i': + filename = optarg; + break; + case 'b': + isBinaryFile = 1; + break; + case 't': + threshold = atof(optarg); + break; + case 'm': + max_nclusters = atoi(optarg); + break; + case 'n': + min_nclusters = atoi(optarg); + break; + case 'r': + isRMSE = 1; + break; + case 'o': + isOutput = 1; + break; + case 'l': + nloops = atoi(optarg); + break; + case '?': + usage(argv[0]); + break; + default: + usage(argv[0]); + break; + } + } + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + // io_timing = omp_get_wtime(); + + /*if (isBinaryFile) { // Binary file input + FILE *infile; + if ((infile = fopen("100", "r")) == NULL) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + fread(&npoints, 1, sizeof(int), infile); + fread(&nfeatures, 1, sizeof(int), infile); + + // allocate space for features[][] and read attributes of all objects + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) { + features[i] = features[i - 1] + nfeatures; + } + fread(buf, 1, npoints * nfeatures * sizeof(float), infile); + fclose(infile); + } else { + FILE *infile; + if ((infile = fopen("100", "r")) == NULL) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + while (fgets(line, 1024, infile) != NULL) + if (strtok(line, " \t\n") != 0) { + npoints++; + } + rewind(infile); + while (fgets(line, 1024, infile) != NULL) { + if (strtok(line, " \t\n") != 0) { + // ignore the id (first attribute): nfeatures = 1; + while (strtok(NULL, " ,\t\n") != NULL) + nfeatures++; + break; + } + } + + // allocate space for features[] and read attributes of all objects + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) + features[i] = features[i - 1] + nfeatures; + rewind(infile); + i = 0; + while (fgets(line, 1024, infile) != NULL) { + if (strtok(line, " \t\n") == NULL) + continue; + for (j = 0; j < nfeatures; j++) { + buf[i] = atof(strtok(NULL, " ,\t\n")); + i++; + } + } + fclose(infile); + }*/ + + npoints = 100; + nfeatures = 100; + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) { + features[i] = features[i - 1] + nfeatures; + } + for (i = 0; i < npoints * nfeatures; ++i) { + buf[i] = (i % 64); + } + + // io_timing = omp_get_wtime() - io_timing; + + printf("\nI/O completed\n"); + printf("\nNumber of objects: %d\n", npoints); + printf("Number of features: %d\n", nfeatures); + /* ============== I/O end ==============*/ + + // error check for clusters + if (npoints < min_nclusters) { + printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n", + min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy( + features[0], buf, + npoints * nfeatures * + sizeof( + float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + // cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + + // cluster_timing = omp_get_wtime() - cluster_timing; + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if ((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for (i = 0; i < max_nclusters; i++) { + printf("%d:", i); + for (j = 0; j < nfeatures; j++) { + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float)((max_nclusters - min_nclusters + 1) * nloops); + + printf("Number of Iteration: %d\n", nloops); + // printf("Time for I/O: %.5fsec\n", io_timing); + // printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if (min_nclusters != max_nclusters) { + if (nloops != 1) { // range of k, multiple iteration + // printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } else { // range of k, single iteration + // printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } else { + if (nloops != 1) { // single k, multiple iteration + // printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if (isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", + rmse, index + 1); + } else { // single k, single iteration + if (isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + /* free up memory */ + free(features[0]); + free(features); + return (0); +} diff --git a/benchmarks/opencl/kmeans/rmse.c b/benchmarks/opencl/kmeans/rmse.c new file mode 100755 index 00000000..03d614a6 --- /dev/null +++ b/benchmarks/opencl/kmeans/rmse.c @@ -0,0 +1,94 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i