lbm
This commit is contained in:
18150
benchmarks/opencl/lbm/120_120_150_ldc.of
Normal file
18150
benchmarks/opencl/lbm/120_120_150_ldc.of
Normal file
File diff suppressed because it is too large
Load Diff
68
benchmarks/opencl/lbm/Makefile
Normal file
68
benchmarks/opencl/lbm/Makefile
Normal file
@@ -0,0 +1,68 @@
|
||||
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH = $(wildcard ../include)
|
||||
POCL_LIB_PATH = $(wildcard ../lib)
|
||||
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
|
||||
PROJECT = lbm
|
||||
|
||||
SRCS = main.cc args.c parboil_opencl.c gpu_info.c lbm.c ocl.c
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||
617
benchmarks/opencl/lbm/args.c
Normal file
617
benchmarks/opencl/lbm/args.c
Normal file
@@ -0,0 +1,617 @@
|
||||
|
||||
#include <parboil.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Memory management routines */
|
||||
|
||||
/* Free an array of owned strings. */
|
||||
void
|
||||
pb_FreeStringArray(char **string_array)
|
||||
{
|
||||
char **p;
|
||||
|
||||
if (!string_array) return;
|
||||
for (p = string_array; *p; p++) free(*p);
|
||||
free(string_array);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version)
|
||||
{
|
||||
if (name == NULL) {
|
||||
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *ret =
|
||||
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
|
||||
|
||||
ret->name = name;
|
||||
ret->version = version;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
free(p->name);
|
||||
free(p->version);
|
||||
free(p);
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_INDEX;
|
||||
ret->index = index;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_CPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_GPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_ACCELERATOR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_NAME;
|
||||
ret->name = name;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
switch(p->criterion) {
|
||||
case pb_Device_NAME:
|
||||
free(p->name);
|
||||
break;
|
||||
case pb_Device_INDEX:
|
||||
case pb_Device_CPU:
|
||||
case pb_Device_ACCELERATOR:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p)
|
||||
{
|
||||
free(p->outFile);
|
||||
pb_FreeStringArray(p->inpFiles);
|
||||
pb_FreePlatformParam(p->platform);
|
||||
pb_FreeDeviceParam(p->device);
|
||||
free(p);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
/* Parse a comma-delimited list of strings into an
|
||||
* array of strings. */
|
||||
static char **
|
||||
read_string_array(char *in)
|
||||
{
|
||||
char **ret;
|
||||
int i;
|
||||
int count; /* Number of items in the input */
|
||||
char *substring; /* Current substring within 'in' */
|
||||
|
||||
/* Count the number of items in the string */
|
||||
count = 1;
|
||||
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
|
||||
|
||||
/* Allocate storage */
|
||||
ret = (char **)malloc((count + 1) * sizeof(char *));
|
||||
|
||||
/* Create copies of the strings from the list */
|
||||
substring = in;
|
||||
for (i = 0; i < count; i++) {
|
||||
char *substring_end;
|
||||
int substring_length;
|
||||
|
||||
/* Find length of substring */
|
||||
for (substring_end = substring;
|
||||
(*substring_end != ',') && (*substring_end != 0);
|
||||
substring_end++);
|
||||
|
||||
substring_length = substring_end - substring;
|
||||
|
||||
/* Allocate memory and copy the substring */
|
||||
ret[i] = (char *)malloc(substring_length + 1);
|
||||
memcpy(ret[i], substring, substring_length);
|
||||
ret[i][substring_length] = 0;
|
||||
|
||||
/* go to next substring */
|
||||
substring = substring_end + 1;
|
||||
}
|
||||
ret[i] = NULL; /* Write the sentinel value */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
report_parse_error(const char *str)
|
||||
{
|
||||
fputs(str, stderr);
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_DeviceParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_DeviceParam *
|
||||
read_device_param(char *str)
|
||||
{
|
||||
/* Try different ways of interpreting 'device_string' until one works */
|
||||
|
||||
/* If argument is an integer, then interpret it as a device index */
|
||||
errno = 0;
|
||||
char *end;
|
||||
long device_int = strtol(str, &end, 10);
|
||||
if (!errno) {
|
||||
/* Negative numbers are not valid */
|
||||
if (device_int < 0 || device_int > INT_MAX) return NULL;
|
||||
|
||||
return pb_DeviceParam_index(device_int);
|
||||
}
|
||||
|
||||
/* Match against predefined strings */
|
||||
if (strcmp(str, "CPU") == 0)
|
||||
return pb_DeviceParam_cpu();
|
||||
if (strcmp(str, "GPU") == 0)
|
||||
return pb_DeviceParam_gpu();
|
||||
if (strcmp(str, "ACCELERATOR") == 0)
|
||||
return pb_DeviceParam_accelerator();
|
||||
|
||||
/* Assume any other string is a device name */
|
||||
return pb_DeviceParam_name(strdup(str));
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_PlatformParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_PlatformParam *
|
||||
read_platform_param(char *str)
|
||||
{
|
||||
int separator_index; /* Index of the '-' character separating
|
||||
* name and version number. It's -1 if
|
||||
* there's no '-' character. */
|
||||
|
||||
/* Find the last occurrence of '-' in 'str' */
|
||||
{
|
||||
char *cur;
|
||||
separator_index = -1;
|
||||
for (cur = str; *cur; cur++) {
|
||||
if (*cur == '-') separator_index = cur - str;
|
||||
}
|
||||
}
|
||||
|
||||
/* The platform name is either the entire string, or all characters before
|
||||
* the separator */
|
||||
int name_length = separator_index == -1 ? strlen(str) : separator_index;
|
||||
char *name_str = (char *)malloc(name_length + 1);
|
||||
memcpy(name_str, str, name_length);
|
||||
name_str[name_length] = 0;
|
||||
|
||||
/* The version is either NULL, or all characters after the separator */
|
||||
char *version_str;
|
||||
if (separator_index == -1) {
|
||||
version_str = NULL;
|
||||
}
|
||||
else {
|
||||
const char *version_input_str = str + separator_index + 1;
|
||||
int version_length = strlen(version_input_str);
|
||||
|
||||
version_str = (char *)malloc(version_length + 1);
|
||||
memcpy(version_str, version_input_str, version_length);
|
||||
version_str[version_length] = 0;
|
||||
}
|
||||
|
||||
/* Create output structure */
|
||||
return pb_PlatformParam(name_str, version_str);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* Argument parsing state */
|
||||
|
||||
/* Argument parsing state.
|
||||
*
|
||||
* Arguments that are interpreted by the argument parser are removed from
|
||||
* the list. Variables 'argc' and 'argn' do not count arguments that have
|
||||
* been removed.
|
||||
*
|
||||
* During argument parsing, the array of arguments is compacted, overwriting
|
||||
* the erased arguments. Variable 'argv_put' points to the array element
|
||||
* where the next argument will be written. Variable 'argv_get' points to
|
||||
* the array element where the next argument will be read from.
|
||||
*/
|
||||
struct argparse {
|
||||
int argc; /* Number of arguments. Mutable. */
|
||||
int argn; /* Current argument index. */
|
||||
char **argv_get; /* Argument value being read. */
|
||||
char **argv_put; /* Argument value being written.
|
||||
* argv_put <= argv_get. */
|
||||
};
|
||||
|
||||
static void
|
||||
initialize_argparse(struct argparse *ap, int argc, char **argv)
|
||||
{
|
||||
ap->argc = argc;
|
||||
ap->argn = 0;
|
||||
ap->argv_get = ap->argv_put = argv;
|
||||
}
|
||||
|
||||
/* Finish argument parsing, without processing the remaining arguments.
|
||||
* Write new argument count into _argc. */
|
||||
static void
|
||||
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
|
||||
{
|
||||
/* Move the remaining arguments */
|
||||
for(; ap->argn < ap->argc; ap->argn++)
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
|
||||
/* Update the argument count */
|
||||
*_argc = ap->argc;
|
||||
|
||||
/* Insert a terminating NULL */
|
||||
argv[ap->argc] = NULL;
|
||||
}
|
||||
|
||||
/* Delete the current argument. The argument will not be visible
|
||||
* when argument parsing is done. */
|
||||
static void
|
||||
delete_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "delete_argument\n");
|
||||
}
|
||||
ap->argc--;
|
||||
ap->argv_get++;
|
||||
}
|
||||
|
||||
/* Go to the next argument. Also, move the current argument to its
|
||||
* final location in argv. */
|
||||
static void
|
||||
next_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "next_argument\n");
|
||||
}
|
||||
/* Move argument to its new location. */
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
ap->argn++;
|
||||
}
|
||||
|
||||
static int
|
||||
is_end_of_arguments(struct argparse *ap)
|
||||
{
|
||||
return ap->argn == ap->argc;
|
||||
}
|
||||
|
||||
/* Get the current argument */
|
||||
static char *
|
||||
get_argument(struct argparse *ap)
|
||||
{
|
||||
return *ap->argv_get;
|
||||
}
|
||||
|
||||
/* Get the current argument, and also delete it */
|
||||
static char *
|
||||
consume_argument(struct argparse *ap)
|
||||
{
|
||||
char *ret = get_argument(ap);
|
||||
delete_argument(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* The result of parsing a command-line argument */
|
||||
typedef enum {
|
||||
ARGPARSE_OK, /* Success */
|
||||
ARGPARSE_ERROR, /* Error */
|
||||
ARGPARSE_DONE /* Success, and do not continue parsing */
|
||||
} result;
|
||||
|
||||
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
|
||||
|
||||
|
||||
/* A command-line option */
|
||||
struct option {
|
||||
char short_name; /* If not 0, the one-character
|
||||
* name of this option */
|
||||
const char *long_name; /* If not NULL, the long name of this option */
|
||||
parse_action *action; /* What to do when this option occurs.
|
||||
* Sentinel value is NULL.
|
||||
*/
|
||||
};
|
||||
|
||||
/* Output file
|
||||
*
|
||||
* -o FILE
|
||||
*/
|
||||
static result
|
||||
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-o'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the output file name */
|
||||
free(params->outFile);
|
||||
params->outFile = strdup(consume_argument(ap));
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* Input files
|
||||
*
|
||||
* -i FILE,FILE,...
|
||||
*/
|
||||
static result
|
||||
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-i'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the input file list */
|
||||
pb_FreeStringArray(params->inpFiles);
|
||||
params->inpFiles = read_string_array(consume_argument(ap));
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* End of options
|
||||
*
|
||||
* --
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
return ARGPARSE_DONE;
|
||||
}
|
||||
|
||||
/* OpenCL device
|
||||
*
|
||||
* --device X
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_device(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a device */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--device'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *device_string = consume_argument(ap);
|
||||
struct pb_DeviceParam *device_param = read_device_param(device_string);
|
||||
|
||||
if (!device_param) {
|
||||
report_parse_error("Unrecognized device specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreeDeviceParam(params->device);
|
||||
params->device = device_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
static result
|
||||
parse_platform(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a platform */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--platform'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *platform_string = consume_argument(ap);
|
||||
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
|
||||
|
||||
if (!platform_param) {
|
||||
report_parse_error("Unrecognized platform specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreePlatformParam(params->platform);
|
||||
params->platform = platform_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
|
||||
static struct option options[] = {
|
||||
{ 'o', NULL, &parse_output_file },
|
||||
{ 'i', NULL, &parse_input_files },
|
||||
{ '-', NULL, &parse_end_options },
|
||||
{ 0, "device", &parse_device },
|
||||
{ 0, "platform", &parse_platform },
|
||||
{ 0, NULL, NULL }
|
||||
};
|
||||
|
||||
static int
|
||||
is_last_option(struct option *op)
|
||||
{
|
||||
return op->action == NULL;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* Parse command-line parameters.
|
||||
* Return zero on error, nonzero otherwise.
|
||||
* On error, the other outputs may be invalid.
|
||||
*
|
||||
* The information collected from parameters is used to update
|
||||
* 'ret'. 'ret' should be initialized.
|
||||
*
|
||||
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
|
||||
*/
|
||||
static int
|
||||
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
|
||||
{
|
||||
char *err_message;
|
||||
struct argparse ap;
|
||||
|
||||
/* Each argument */
|
||||
initialize_argparse(&ap, *_argc, argv);
|
||||
while(!is_end_of_arguments(&ap)) {
|
||||
result arg_result; /* Result of parsing this option */
|
||||
char *arg = get_argument(&ap);
|
||||
|
||||
/* Process this argument */
|
||||
if (arg[0] == '-') {
|
||||
/* Single-character flag */
|
||||
if ((arg[1] != 0) && (arg[2] == 0)) {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching short option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->short_name == arg[1]) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
/* Long flag */
|
||||
if (arg[1] == '-') {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching long option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Other arguments are ignored */
|
||||
next_argument(&ap);
|
||||
arg_result = ARGPARSE_OK;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
option_was_processed:
|
||||
/* Decide what to do next based on 'arg_result' */
|
||||
switch(arg_result) {
|
||||
case ARGPARSE_OK:
|
||||
/* Continue processing */
|
||||
break;
|
||||
|
||||
case ARGPARSE_ERROR:
|
||||
/* Error exit from the function */
|
||||
return 0;
|
||||
|
||||
case ARGPARSE_DONE:
|
||||
/* Normal exit from the argument parsing loop */
|
||||
goto end_of_options;
|
||||
}
|
||||
} /* end for each argument */
|
||||
|
||||
/* If all arguments were processed, then normal exit from the loop */
|
||||
|
||||
end_of_options:
|
||||
finalize_argparse(&ap, _argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Other exported functions */
|
||||
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv)
|
||||
{
|
||||
struct pb_Parameters *ret =
|
||||
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
|
||||
|
||||
/* Initialize the parameters structure */
|
||||
ret->outFile = NULL;
|
||||
ret->inpFiles = (char **)malloc(sizeof(char *));
|
||||
ret->inpFiles[0] = NULL;
|
||||
ret->platform = NULL;
|
||||
ret->device = NULL;
|
||||
|
||||
/* Read parameters and update _argc, argv */
|
||||
if (!pb_ParseParameters(ret, _argc, argv)) {
|
||||
/* Parse error */
|
||||
pb_FreeParameters(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; p->inpFiles[n]; n++);
|
||||
return n;
|
||||
}
|
||||
|
||||
55
benchmarks/opencl/lbm/gpu_info.c
Normal file
55
benchmarks/opencl/lbm/gpu_info.c
Normal file
@@ -0,0 +1,55 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
//#include <endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm)
|
||||
{
|
||||
int max_thread;
|
||||
int max_block=8;
|
||||
if(major==1)
|
||||
{
|
||||
if(minor>=2)
|
||||
max_thread=1024;
|
||||
else
|
||||
max_thread=768;
|
||||
}
|
||||
else if(major==2)
|
||||
max_thread=1536;
|
||||
else
|
||||
//newer GPU //keep using 2.0
|
||||
max_thread=1536;
|
||||
|
||||
int _grid;
|
||||
int _thread;
|
||||
|
||||
if(task*pad>sm*max_thread)
|
||||
{
|
||||
_thread=max_thread/max_block;
|
||||
_grid = ((task*pad+_thread-1)/_thread)*_thread;
|
||||
}
|
||||
else
|
||||
{
|
||||
_thread=pad;
|
||||
_grid=task*pad;
|
||||
}
|
||||
|
||||
thread[0]=_thread;
|
||||
grid[0]=_grid;
|
||||
}
|
||||
20
benchmarks/opencl/lbm/gpu_info.h
Normal file
20
benchmarks/opencl/lbm/gpu_info.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __GPUINFOH__
|
||||
#define __GPUINFOH__
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm);
|
||||
|
||||
#endif
|
||||
424
benchmarks/opencl/lbm/kernel.cl
Normal file
424
benchmarks/opencl/lbm/kernel.cl
Normal file
@@ -0,0 +1,424 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LBM_KERNEL_CL
|
||||
#define LBM_KERNEL_CL
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#ifndef _LAYOUT_CONFIG_H_
|
||||
#define _LAYOUT_CONFIG_H_
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
//Unchangeable settings: volume simulation size for the given example
|
||||
#define SIZE_X (32)
|
||||
#define SIZE_Y (32)
|
||||
#define SIZE_Z (32)
|
||||
|
||||
//Changeable settings
|
||||
//Padding in each dimension
|
||||
#define PADDING_X (8)
|
||||
#define PADDING_Y (0)
|
||||
#define PADDING_Z (4)
|
||||
|
||||
//Pitch in each dimension
|
||||
#define PADDED_X (SIZE_X+PADDING_X)
|
||||
#define PADDED_Y (SIZE_Y+PADDING_Y)
|
||||
#define PADDED_Z (SIZE_Z+PADDING_Z)
|
||||
|
||||
#define TOTAL_CELLS (SIZE_X*SIZE_Y*SIZE_Z)
|
||||
#define TOTAL_PADDED_CELLS (PADDED_X*PADDED_Y*PADDED_Z)
|
||||
|
||||
//Flattening function
|
||||
// This macro will be used to map a 3-D index and element to a value
|
||||
// The macro below implements the equivalent of a 3-D array of
|
||||
// 20-element structures in C standard layout.
|
||||
#define CALC_INDEX(x,y,z,e) ( e + N_CELL_ENTRIES*\
|
||||
((x)+(y)*PADDED_X+(z)*PADDED_X*PADDED_Y) )
|
||||
|
||||
#define MARGIN (CALC_INDEX(0, 0, 2, 0) - CALC_INDEX(0,0,0,0))
|
||||
|
||||
// Set this value to 1 for GATHER, or 0 for SCATTER
|
||||
#if 1
|
||||
#define GATHER
|
||||
#else
|
||||
#define SCATTER
|
||||
#endif
|
||||
|
||||
//OpenCL block size (not trivially changeable here)
|
||||
#define BLOCK_SIZE SIZE_X
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
typedef enum {C = 0,
|
||||
N, S, E, W, T, B,
|
||||
NE, NW, SE, SW,
|
||||
NT, NB, ST, SB,
|
||||
ET, EB, WT, WB,
|
||||
FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
|
||||
|
||||
#define N_DISTR_FUNCS FLAGS
|
||||
|
||||
typedef enum {OBSTACLE = 1 << 0,
|
||||
ACCEL = 1 << 1,
|
||||
IN_OUT_FLOW = 1 << 2} CELL_FLAGS;
|
||||
|
||||
#endif /* _CONFIG_H_ */
|
||||
|
||||
|
||||
#ifndef _LBM_MARCOS_H
|
||||
#define _LBM_MACROS_H_
|
||||
|
||||
#define OMEGA (1.95f)
|
||||
|
||||
#define OUTPUT_PRECISION float
|
||||
|
||||
#define BOOL int
|
||||
#define TRUE (-1)
|
||||
#define FALSE (0)
|
||||
|
||||
#define DFL1 (1.0f/ 3.0f)
|
||||
#define DFL2 (1.0f/18.0f)
|
||||
#define DFL3 (1.0f/36.0f)
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
typedef float* LBM_Grid;//float LBM_Grid[PADDED_Z*PADDED_Y*PADDED_X*N_CELL_ENTRIES];
|
||||
typedef LBM_Grid* LBM_GridPtr;
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
|
||||
#define SWEEP_X __temp_x__
|
||||
#define SWEEP_Y __temp_y__
|
||||
#define SWEEP_Z __temp_z__
|
||||
#define SWEEP_VAR int __temp_x__, __temp_y__, __temp_z__;
|
||||
|
||||
#define SWEEP_START(x1,y1,z1,x2,y2,z2) \
|
||||
for( __temp_z__ = z1; \
|
||||
__temp_z__ < z2; \
|
||||
__temp_z__++) { \
|
||||
for( __temp_y__ = 0; \
|
||||
__temp_y__ < SIZE_Y; \
|
||||
__temp_y__++) { \
|
||||
for(__temp_x__ = 0; \
|
||||
__temp_x__ < SIZE_X; \
|
||||
__temp_x__++) { \
|
||||
|
||||
#define SWEEP_END }}}
|
||||
|
||||
|
||||
#define GRID_ENTRY(g,x,y,z,e) ((g)[CALC_INDEX( x, y, z, e)])
|
||||
#define GRID_ENTRY_SWEEP(g,dx,dy,dz,e) ((g)[CALC_INDEX((dx)+SWEEP_X, (dy)+SWEEP_Y, (dz)+SWEEP_Z, e)])
|
||||
|
||||
#define LOCAL(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
|
||||
#define NEIGHBOR_C(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
|
||||
#define NEIGHBOR_N(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, 0, e ))
|
||||
#define NEIGHBOR_S(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, 0, e ))
|
||||
#define NEIGHBOR_E(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, 0, e ))
|
||||
#define NEIGHBOR_W(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, 0, e ))
|
||||
#define NEIGHBOR_T(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, +1, e ))
|
||||
#define NEIGHBOR_B(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, -1, e ))
|
||||
#define NEIGHBOR_NE(g,e) (GRID_ENTRY_SWEEP( g, +1, +1, 0, e ))
|
||||
#define NEIGHBOR_NW(g,e) (GRID_ENTRY_SWEEP( g, -1, +1, 0, e ))
|
||||
#define NEIGHBOR_SE(g,e) (GRID_ENTRY_SWEEP( g, +1, -1, 0, e ))
|
||||
#define NEIGHBOR_SW(g,e) (GRID_ENTRY_SWEEP( g, -1, -1, 0, e ))
|
||||
#define NEIGHBOR_NT(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, +1, e ))
|
||||
#define NEIGHBOR_NB(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, -1, e ))
|
||||
#define NEIGHBOR_ST(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, +1, e ))
|
||||
#define NEIGHBOR_SB(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, -1, e ))
|
||||
#define NEIGHBOR_ET(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, +1, e ))
|
||||
#define NEIGHBOR_EB(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, -1, e ))
|
||||
#define NEIGHBOR_WT(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, +1, e ))
|
||||
#define NEIGHBOR_WB(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, -1, e ))
|
||||
|
||||
|
||||
#ifdef SCATTER
|
||||
|
||||
#define SRC_C(g) (LOCAL( g, C ))
|
||||
#define SRC_N(g) (LOCAL( g, N ))
|
||||
#define SRC_S(g) (LOCAL( g, S ))
|
||||
#define SRC_E(g) (LOCAL( g, E ))
|
||||
#define SRC_W(g) (LOCAL( g, W ))
|
||||
#define SRC_T(g) (LOCAL( g, T ))
|
||||
#define SRC_B(g) (LOCAL( g, B ))
|
||||
#define SRC_NE(g) (LOCAL( g, NE ))
|
||||
#define SRC_NW(g) (LOCAL( g, NW ))
|
||||
#define SRC_SE(g) (LOCAL( g, SE ))
|
||||
#define SRC_SW(g) (LOCAL( g, SW ))
|
||||
#define SRC_NT(g) (LOCAL( g, NT ))
|
||||
#define SRC_NB(g) (LOCAL( g, NB ))
|
||||
#define SRC_ST(g) (LOCAL( g, ST ))
|
||||
#define SRC_SB(g) (LOCAL( g, SB ))
|
||||
#define SRC_ET(g) (LOCAL( g, ET ))
|
||||
#define SRC_EB(g) (LOCAL( g, EB ))
|
||||
#define SRC_WT(g) (LOCAL( g, WT ))
|
||||
#define SRC_WB(g) (LOCAL( g, WB ))
|
||||
|
||||
#define DST_C(g) (NEIGHBOR_C ( g, C ))
|
||||
#define DST_N(g) (NEIGHBOR_N ( g, N ))
|
||||
#define DST_S(g) (NEIGHBOR_S ( g, S ))
|
||||
#define DST_E(g) (NEIGHBOR_E ( g, E ))
|
||||
#define DST_W(g) (NEIGHBOR_W ( g, W ))
|
||||
#define DST_T(g) (NEIGHBOR_T ( g, T ))
|
||||
#define DST_B(g) (NEIGHBOR_B ( g, B ))
|
||||
#define DST_NE(g) (NEIGHBOR_NE( g, NE ))
|
||||
#define DST_NW(g) (NEIGHBOR_NW( g, NW ))
|
||||
#define DST_SE(g) (NEIGHBOR_SE( g, SE ))
|
||||
#define DST_SW(g) (NEIGHBOR_SW( g, SW ))
|
||||
#define DST_NT(g) (NEIGHBOR_NT( g, NT ))
|
||||
#define DST_NB(g) (NEIGHBOR_NB( g, NB ))
|
||||
#define DST_ST(g) (NEIGHBOR_ST( g, ST ))
|
||||
#define DST_SB(g) (NEIGHBOR_SB( g, SB ))
|
||||
#define DST_ET(g) (NEIGHBOR_ET( g, ET ))
|
||||
#define DST_EB(g) (NEIGHBOR_EB( g, EB ))
|
||||
#define DST_WT(g) (NEIGHBOR_WT( g, WT ))
|
||||
#define DST_WB(g) (NEIGHBOR_WB( g, WB ))
|
||||
|
||||
#else /* GATHER */
|
||||
|
||||
#define SRC_C(g) (NEIGHBOR_C ( g, C ))
|
||||
#define SRC_N(g) (NEIGHBOR_S ( g, N ))
|
||||
#define SRC_S(g) (NEIGHBOR_N ( g, S ))
|
||||
#define SRC_E(g) (NEIGHBOR_W ( g, E ))
|
||||
#define SRC_W(g) (NEIGHBOR_E ( g, W ))
|
||||
#define SRC_T(g) (NEIGHBOR_B ( g, T ))
|
||||
#define SRC_B(g) (NEIGHBOR_T ( g, B ))
|
||||
#define SRC_NE(g) (NEIGHBOR_SW( g, NE ))
|
||||
#define SRC_NW(g) (NEIGHBOR_SE( g, NW ))
|
||||
#define SRC_SE(g) (NEIGHBOR_NW( g, SE ))
|
||||
#define SRC_SW(g) (NEIGHBOR_NE( g, SW ))
|
||||
#define SRC_NT(g) (NEIGHBOR_SB( g, NT ))
|
||||
#define SRC_NB(g) (NEIGHBOR_ST( g, NB ))
|
||||
#define SRC_ST(g) (NEIGHBOR_NB( g, ST ))
|
||||
#define SRC_SB(g) (NEIGHBOR_NT( g, SB ))
|
||||
#define SRC_ET(g) (NEIGHBOR_WB( g, ET ))
|
||||
#define SRC_EB(g) (NEIGHBOR_WT( g, EB ))
|
||||
#define SRC_WT(g) (NEIGHBOR_EB( g, WT ))
|
||||
#define SRC_WB(g) (NEIGHBOR_ET( g, WB ))
|
||||
|
||||
#define DST_C(g) (LOCAL( g, C ))
|
||||
#define DST_N(g) (LOCAL( g, N ))
|
||||
#define DST_S(g) (LOCAL( g, S ))
|
||||
#define DST_E(g) (LOCAL( g, E ))
|
||||
#define DST_W(g) (LOCAL( g, W ))
|
||||
#define DST_T(g) (LOCAL( g, T ))
|
||||
#define DST_B(g) (LOCAL( g, B ))
|
||||
#define DST_NE(g) (LOCAL( g, NE ))
|
||||
#define DST_NW(g) (LOCAL( g, NW ))
|
||||
#define DST_SE(g) (LOCAL( g, SE ))
|
||||
#define DST_SW(g) (LOCAL( g, SW ))
|
||||
#define DST_NT(g) (LOCAL( g, NT ))
|
||||
#define DST_NB(g) (LOCAL( g, NB ))
|
||||
#define DST_ST(g) (LOCAL( g, ST ))
|
||||
#define DST_SB(g) (LOCAL( g, SB ))
|
||||
#define DST_ET(g) (LOCAL( g, ET ))
|
||||
#define DST_EB(g) (LOCAL( g, EB ))
|
||||
#define DST_WT(g) (LOCAL( g, WT ))
|
||||
#define DST_WB(g) (LOCAL( g, WB ))
|
||||
|
||||
#endif /* GATHER */
|
||||
|
||||
#define MAGIC_CAST(v) ((unsigned int*) ((void*) (&(v))))
|
||||
#define FLAG_VAR(v) unsigned int* _aux_ = MAGIC_CAST(v)
|
||||
|
||||
#define TEST_FLAG_SWEEP(g,f) ((*MAGIC_CAST(LOCAL(g, FLAGS))) & (f))
|
||||
#define SET_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) |= (f);}
|
||||
#define CLEAR_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) &= ~(f);}
|
||||
#define CLEAR_ALL_FLAGS_SWEEP(g) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) = 0;}
|
||||
|
||||
#define TEST_FLAG(g,x,y,z,f) ((*MAGIC_CAST(GRID_ENTRY(g, x, y, z, FLAGS))) & (f))
|
||||
#define SET_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) |= (f);}
|
||||
#define CLEAR_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) &= ~(f);}
|
||||
#define CLEAR_ALL_FLAGS(g,x,y,z) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) = 0;}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#endif /* _CONFIG_H_ */
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
__kernel void performStreamCollide_kernel( __global float* srcGrid, __global float* dstGrid )
|
||||
{
|
||||
srcGrid += MARGIN;
|
||||
dstGrid += MARGIN;
|
||||
|
||||
|
||||
//Using some predefined macros here. Consider this the declaration
|
||||
// and initialization of the variables SWEEP_X, SWEEP_Y and SWEEP_Z
|
||||
|
||||
SWEEP_VAR
|
||||
SWEEP_X = get_local_id(0);
|
||||
SWEEP_Y = get_group_id(0);
|
||||
SWEEP_Z = get_group_id(1);
|
||||
|
||||
float temp_swp, tempC, tempN, tempS, tempE, tempW, tempT, tempB;
|
||||
float tempNE, tempNW, tempSE, tempSW, tempNT, tempNB, tempST ;
|
||||
float tempSB, tempET, tempEB, tempWT, tempWB ;
|
||||
|
||||
//Load all of the input fields
|
||||
//This is a gather operation of the SCATTER preprocessor variable
|
||||
// is undefined in layout_config.h, or a "local" read otherwise
|
||||
tempC = SRC_C(srcGrid);
|
||||
|
||||
tempN = SRC_N(srcGrid);
|
||||
tempS = SRC_S(srcGrid);
|
||||
tempE = SRC_E(srcGrid);
|
||||
tempW = SRC_W(srcGrid);
|
||||
tempT = SRC_T(srcGrid);
|
||||
tempB = SRC_B(srcGrid);
|
||||
|
||||
tempNE = SRC_NE(srcGrid);
|
||||
tempNW = SRC_NW(srcGrid);
|
||||
tempSE = SRC_SE(srcGrid);
|
||||
tempSW = SRC_SW(srcGrid);
|
||||
tempNT = SRC_NT(srcGrid);
|
||||
tempNB = SRC_NB(srcGrid);
|
||||
tempST = SRC_ST(srcGrid);
|
||||
tempSB = SRC_SB(srcGrid);
|
||||
tempET = SRC_ET(srcGrid);
|
||||
tempEB = SRC_EB(srcGrid);
|
||||
tempWT = SRC_WT(srcGrid);
|
||||
tempWB = SRC_WB(srcGrid);
|
||||
|
||||
//Test whether the cell is fluid or obstacle
|
||||
if(as_uint(LOCAL(srcGrid,FLAGS)) & (OBSTACLE)) {
|
||||
|
||||
//Swizzle the inputs: reflect any fluid coming into this cell
|
||||
// back to where it came from
|
||||
temp_swp = tempN ; tempN = tempS ; tempS = temp_swp ;
|
||||
temp_swp = tempE ; tempE = tempW ; tempW = temp_swp;
|
||||
temp_swp = tempT ; tempT = tempB ; tempB = temp_swp;
|
||||
temp_swp = tempNE; tempNE = tempSW ; tempSW = temp_swp;
|
||||
temp_swp = tempNW; tempNW = tempSE ; tempSE = temp_swp;
|
||||
temp_swp = tempNT ; tempNT = tempSB ; tempSB = temp_swp;
|
||||
temp_swp = tempNB ; tempNB = tempST ; tempST = temp_swp;
|
||||
temp_swp = tempET ; tempET= tempWB ; tempWB = temp_swp;
|
||||
temp_swp = tempEB ; tempEB = tempWT ; tempWT = temp_swp;
|
||||
}
|
||||
else {
|
||||
|
||||
//The math meat of LBM: ignore for optimization
|
||||
float ux, uy, uz, rho, u2;
|
||||
float temp1, temp2, temp_base;
|
||||
rho = tempC + tempN
|
||||
+ tempS + tempE
|
||||
+ tempW + tempT
|
||||
+ tempB + tempNE
|
||||
+ tempNW + tempSE
|
||||
+ tempSW + tempNT
|
||||
+ tempNB + tempST
|
||||
+ tempSB + tempET
|
||||
+ tempEB + tempWT
|
||||
+ tempWB;
|
||||
|
||||
ux = + tempE - tempW
|
||||
+ tempNE - tempNW
|
||||
+ tempSE - tempSW
|
||||
+ tempET + tempEB
|
||||
- tempWT - tempWB;
|
||||
|
||||
uy = + tempN - tempS
|
||||
+ tempNE + tempNW
|
||||
- tempSE - tempSW
|
||||
+ tempNT + tempNB
|
||||
- tempST - tempSB;
|
||||
|
||||
uz = + tempT - tempB
|
||||
+ tempNT - tempNB
|
||||
+ tempST - tempSB
|
||||
+ tempET - tempEB
|
||||
+ tempWT - tempWB;
|
||||
|
||||
ux /= rho;
|
||||
uy /= rho;
|
||||
uz /= rho;
|
||||
|
||||
if(as_uint(LOCAL(srcGrid,FLAGS)) & (ACCEL)) {
|
||||
|
||||
ux = 0.005f;
|
||||
uy = 0.002f;
|
||||
uz = 0.000f;
|
||||
}
|
||||
|
||||
u2 = 1.5f * (ux*ux + uy*uy + uz*uz) - 1.0f;
|
||||
temp_base = OMEGA*rho;
|
||||
temp1 = DFL1*temp_base;
|
||||
|
||||
//Put the output values for this cell in the shared memory
|
||||
temp_base = OMEGA*rho;
|
||||
temp1 = DFL1*temp_base;
|
||||
temp2 = 1.0f-OMEGA;
|
||||
tempC = temp2*tempC + temp1*( - u2);
|
||||
temp1 = DFL2*temp_base;
|
||||
tempN = temp2*tempN + temp1*( uy*(4.5f*uy + 3.0f) - u2);
|
||||
tempS = temp2*tempS + temp1*( uy*(4.5f*uy - 3.0f) - u2);
|
||||
tempT = temp2*tempT + temp1*( uz*(4.5f*uz + 3.0f) - u2);
|
||||
tempB = temp2*tempB + temp1*( uz*(4.5f*uz - 3.0f) - u2);
|
||||
tempE = temp2*tempE + temp1*( ux*(4.5f*ux + 3.0f) - u2);
|
||||
tempW = temp2*tempW + temp1*( ux*(4.5f*ux - 3.0f) - u2);
|
||||
temp1 = DFL3*temp_base;
|
||||
tempNT= temp2*tempNT + temp1 *( (+uy+uz)*(4.5f*(+uy+uz) + 3.0f) - u2);
|
||||
tempNB= temp2*tempNB + temp1 *( (+uy-uz)*(4.5f*(+uy-uz) + 3.0f) - u2);
|
||||
tempST= temp2*tempST + temp1 *( (-uy+uz)*(4.5f*(-uy+uz) + 3.0f) - u2);
|
||||
tempSB= temp2*tempSB + temp1 *( (-uy-uz)*(4.5f*(-uy-uz) + 3.0f) - u2);
|
||||
tempNE = temp2*tempNE + temp1 *( (+ux+uy)*(4.5f*(+ux+uy) + 3.0f) - u2);
|
||||
tempSE = temp2*tempSE + temp1 *((+ux-uy)*(4.5f*(+ux-uy) + 3.0f) - u2);
|
||||
tempET = temp2*tempET + temp1 *( (+ux+uz)*(4.5f*(+ux+uz) + 3.0f) - u2);
|
||||
tempEB = temp2*tempEB + temp1 *( (+ux-uz)*(4.5f*(+ux-uz) + 3.0f) - u2);
|
||||
tempNW = temp2*tempNW + temp1 *( (-ux+uy)*(4.5f*(-ux+uy) + 3.0f) - u2);
|
||||
tempSW = temp2*tempSW + temp1 *( (-ux-uy)*(4.5f*(-ux-uy) + 3.0f) - u2);
|
||||
tempWT = temp2*tempWT + temp1 *( (-ux+uz)*(4.5f*(-ux+uz) + 3.0f) - u2);
|
||||
tempWB = temp2*tempWB + temp1 *( (-ux-uz)*(4.5f*(-ux-uz) + 3.0f) - u2);
|
||||
}
|
||||
|
||||
//Write the results computed above
|
||||
//This is a scatter operation of the SCATTER preprocessor variable
|
||||
// is defined in layout_config.h, or a "local" write otherwise
|
||||
DST_C ( dstGrid ) = tempC;
|
||||
|
||||
DST_N ( dstGrid ) = tempN;
|
||||
DST_S ( dstGrid ) = tempS;
|
||||
DST_E ( dstGrid ) = tempE;
|
||||
DST_W ( dstGrid ) = tempW;
|
||||
DST_T ( dstGrid ) = tempT;
|
||||
DST_B ( dstGrid ) = tempB;
|
||||
|
||||
DST_NE( dstGrid ) = tempNE;
|
||||
DST_NW( dstGrid ) = tempNW;
|
||||
DST_SE( dstGrid ) = tempSE;
|
||||
DST_SW( dstGrid ) = tempSW;
|
||||
DST_NT( dstGrid ) = tempNT;
|
||||
DST_NB( dstGrid ) = tempNB;
|
||||
DST_ST( dstGrid ) = tempST;
|
||||
DST_SB( dstGrid ) = tempSB;
|
||||
DST_ET( dstGrid ) = tempET;
|
||||
DST_EB( dstGrid ) = tempEB;
|
||||
DST_WT( dstGrid ) = tempWT;
|
||||
DST_WB( dstGrid ) = tempWB;
|
||||
}
|
||||
|
||||
#endif // LBM_KERNEL_CL
|
||||
69
benchmarks/opencl/lbm/layout_config.h
Normal file
69
benchmarks/opencl/lbm/layout_config.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#ifndef _LAYOUT_CONFIG_H_
|
||||
#define _LAYOUT_CONFIG_H_
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
//Unchangeable settings: volume simulation size for the given example
|
||||
#define SIZE_X (32)
|
||||
#define SIZE_Y (16)
|
||||
#define SIZE_Z (8)
|
||||
|
||||
//Changeable settings
|
||||
//Padding in each dimension
|
||||
#define PADDING_X (8)
|
||||
#define PADDING_Y (0)
|
||||
#define PADDING_Z (4)
|
||||
|
||||
//Pitch in each dimension
|
||||
#define PADDED_X (SIZE_X+PADDING_X)
|
||||
#define PADDED_Y (SIZE_Y+PADDING_Y)
|
||||
#define PADDED_Z (SIZE_Z+PADDING_Z)
|
||||
|
||||
#define TOTAL_CELLS (SIZE_X*SIZE_Y*SIZE_Z)
|
||||
#define TOTAL_PADDED_CELLS (PADDED_X*PADDED_Y*PADDED_Z)
|
||||
|
||||
//Flattening function
|
||||
// This macro will be used to map a 3-D index and element to a value
|
||||
// The macro below implements the equivalent of a 3-D array of
|
||||
// 20-element structures in C standard layout.
|
||||
#define CALC_INDEX(x,y,z,e) ( e + N_CELL_ENTRIES*\
|
||||
((x)+(y)*PADDED_X+(z)*PADDED_X*PADDED_Y) )
|
||||
|
||||
#define MARGIN (CALC_INDEX(0, 0, 2, 0) - CALC_INDEX(0,0,0,0))
|
||||
|
||||
// Set this value to 1 for GATHER, or 0 for SCATTER
|
||||
#if 1
|
||||
#define GATHER
|
||||
#else
|
||||
#define SCATTER
|
||||
#endif
|
||||
|
||||
//OpenCL block size (not trivially changeable here)
|
||||
#define BLOCK_SIZE SIZE_X
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
typedef enum {C = 0,
|
||||
N, S, E, W, T, B,
|
||||
NE, NW, SE, SW,
|
||||
NT, NB, ST, SB,
|
||||
ET, EB, WT, WB,
|
||||
FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
|
||||
|
||||
#define N_DISTR_FUNCS FLAGS
|
||||
|
||||
typedef enum {OBSTACLE = 1 << 0,
|
||||
ACCEL = 1 << 1,
|
||||
IN_OUT_FLOW = 1 << 2} CELL_FLAGS;
|
||||
|
||||
#endif /* _CONFIG_H_ */
|
||||
356
benchmarks/opencl/lbm/lbm.c
Normal file
356
benchmarks/opencl/lbm/lbm.c
Normal file
@@ -0,0 +1,356 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
// includes, system
|
||||
#include <CL/cl.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <float.h>
|
||||
|
||||
// includes, project
|
||||
#include "layout_config.h"
|
||||
#include "lbm_macros.h"
|
||||
#include "ocl.h"
|
||||
#include "lbm.h"
|
||||
|
||||
#include "parboil.h"
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
void OpenCL_LBM_performStreamCollide( const OpenCL_Param* prm, cl_mem srcGrid, cl_mem dstGrid ) {
|
||||
|
||||
cl_int clStatus;
|
||||
|
||||
clStatus = clSetKernelArg(prm->clKernel,0,sizeof(cl_mem),(void*)&srcGrid);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
clStatus = clSetKernelArg(prm->clKernel,1,sizeof(cl_mem),(void*)&dstGrid);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
size_t dimBlock[3] = {SIZE_X,1,1};
|
||||
size_t dimGrid[3] = {SIZE_X*SIZE_Y,SIZE_Z,1};
|
||||
clStatus = clEnqueueNDRangeKernel(prm->clCommandQueue,prm->clKernel,3,NULL,dimGrid,dimBlock,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
|
||||
clStatus = clFinish(prm->clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
}
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_allocateGrid( float** ptr ) {
|
||||
const size_t size = TOTAL_PADDED_CELLS * N_CELL_ENTRIES * sizeof(float);
|
||||
*ptr = (float*)malloc( size );
|
||||
if( !ptr ) {
|
||||
printf( "LBM_allocateGrid: could not allocate %.1f MByte\n",
|
||||
size / (1024.0*1024.0) );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
memset( *ptr, 0, size );
|
||||
|
||||
printf( "LBM_allocateGrid: allocated %.1f MByte\n",
|
||||
size / (1024.0*1024.0) );
|
||||
|
||||
*ptr += MARGIN;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
void OpenCL_LBM_allocateGrid( const OpenCL_Param* prm, cl_mem* ptr ) {
|
||||
const size_t size = TOTAL_PADDED_CELLS*N_CELL_ENTRIES*sizeof( float );
|
||||
cl_int clStatus;
|
||||
/*size_t max_alloc_size = 0;
|
||||
clGetDeviceInfo(prm->clDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
|
||||
sizeof(max_alloc_size), &max_alloc_size, NULL);
|
||||
if (max_alloc_size < size) {
|
||||
fprintf(stderr, "Can't allocate buffer: max alloc size is %dMB\n",
|
||||
(int) (max_alloc_size >> 20));
|
||||
exit(-1);
|
||||
}*/
|
||||
*ptr = clCreateBuffer(prm->clContext,CL_MEM_READ_WRITE,size,NULL,&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_freeGrid( float** ptr ) {
|
||||
free( *ptr-MARGIN );
|
||||
*ptr = NULL;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
void OpenCL_LBM_freeGrid(cl_mem ptr) {
|
||||
clReleaseMemObject(ptr);
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_initializeGrid( LBM_Grid grid ) {
|
||||
SWEEP_VAR
|
||||
|
||||
SWEEP_START( 0, 0, 0, 0, 0, SIZE_Z )
|
||||
SRC_C( grid ) = DFL1;
|
||||
SRC_N( grid ) = DFL2;
|
||||
SRC_S( grid ) = DFL2;
|
||||
SRC_E( grid ) = DFL2;
|
||||
SRC_W( grid ) = DFL2;
|
||||
SRC_T( grid ) = DFL2;
|
||||
SRC_B( grid ) = DFL2;
|
||||
SRC_NE( grid ) = DFL3;
|
||||
SRC_NW( grid ) = DFL3;
|
||||
SRC_SE( grid ) = DFL3;
|
||||
SRC_SW( grid ) = DFL3;
|
||||
SRC_NT( grid ) = DFL3;
|
||||
SRC_NB( grid ) = DFL3;
|
||||
SRC_ST( grid ) = DFL3;
|
||||
SRC_SB( grid ) = DFL3;
|
||||
SRC_ET( grid ) = DFL3;
|
||||
SRC_EB( grid ) = DFL3;
|
||||
SRC_WT( grid ) = DFL3;
|
||||
SRC_WB( grid ) = DFL3;
|
||||
|
||||
CLEAR_ALL_FLAGS_SWEEP( grid );
|
||||
SWEEP_END
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
void OpenCL_LBM_initializeGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid h_grid ) {
|
||||
const size_t size = TOTAL_PADDED_CELLS*N_CELL_ENTRIES*sizeof( float );
|
||||
cl_int clStatus;
|
||||
clStatus = clEnqueueWriteBuffer(prm->clCommandQueue,d_grid,CL_TRUE,0,size,h_grid-MARGIN,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
}
|
||||
|
||||
void OpenCL_LBM_getDeviceGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid h_grid ) {
|
||||
const size_t size = TOTAL_PADDED_CELLS*N_CELL_ENTRIES*sizeof( float );
|
||||
cl_int clStatus;
|
||||
clStatus = clEnqueueReadBuffer(prm->clCommandQueue,d_grid,CL_TRUE,0,size,h_grid-MARGIN,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueReadBuffer")
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_swapGrids( cl_mem* grid1, cl_mem* grid2 ) {
|
||||
cl_mem aux = *grid1;
|
||||
*grid1 = *grid2;
|
||||
*grid2 = aux;
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_loadObstacleFile( LBM_Grid grid, const char* filename ) {
|
||||
int x, y, z;
|
||||
|
||||
FILE* file = fopen( filename, "rb" );
|
||||
|
||||
for( z = 0; z < SIZE_Z; z++ ) {
|
||||
for( y = 0; y < SIZE_Y; y++ ) {
|
||||
for( x = 0; x < SIZE_X; x++ ) {
|
||||
if( fgetc( file ) != '.' ) SET_FLAG( grid, x, y, z, OBSTACLE );
|
||||
}
|
||||
fgetc( file );
|
||||
}
|
||||
fgetc( file );
|
||||
}
|
||||
|
||||
fclose( file );
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_initializeSpecialCellsForLDC( LBM_Grid grid ) {
|
||||
int x, y, z;
|
||||
|
||||
for( z = -2; z < SIZE_Z+2; z++ ) {
|
||||
for( y = 0; y < SIZE_Y; y++ ) {
|
||||
for( x = 0; x < SIZE_X; x++ ) {
|
||||
if( x == 0 || x == SIZE_X-1 ||
|
||||
y == 0 || y == SIZE_Y-1 ||
|
||||
z == 0 || z == SIZE_Z-1 ) {
|
||||
SET_FLAG( grid, x, y, z, OBSTACLE );
|
||||
}
|
||||
else {
|
||||
if( (z == 1 || z == SIZE_Z-2) &&
|
||||
x > 1 && x < SIZE_X-2 &&
|
||||
y > 1 && y < SIZE_Y-2 ) {
|
||||
SET_FLAG( grid, x, y, z, ACCEL );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_showGridStatistics( LBM_Grid grid ) {
|
||||
int nObstacleCells = 0,
|
||||
nAccelCells = 0,
|
||||
nFluidCells = 0;
|
||||
float ux, uy, uz;
|
||||
float minU2 = 1e+30, maxU2 = -1e+30, u2;
|
||||
float minRho = 1e+30, maxRho = -1e+30, rho;
|
||||
float mass = 0;
|
||||
|
||||
SWEEP_VAR
|
||||
|
||||
SWEEP_START( 0, 0, 0, 0, 0, SIZE_Z )
|
||||
rho = LOCAL( grid, C ) + LOCAL( grid, N )
|
||||
+ LOCAL( grid, S ) + LOCAL( grid, E )
|
||||
+ LOCAL( grid, W ) + LOCAL( grid, T )
|
||||
+ LOCAL( grid, B ) + LOCAL( grid, NE )
|
||||
+ LOCAL( grid, NW ) + LOCAL( grid, SE )
|
||||
+ LOCAL( grid, SW ) + LOCAL( grid, NT )
|
||||
+ LOCAL( grid, NB ) + LOCAL( grid, ST )
|
||||
+ LOCAL( grid, SB ) + LOCAL( grid, ET )
|
||||
+ LOCAL( grid, EB ) + LOCAL( grid, WT )
|
||||
+ LOCAL( grid, WB );
|
||||
|
||||
if( rho < minRho ) minRho = rho;
|
||||
if( rho > maxRho ) maxRho = rho;
|
||||
mass += rho;
|
||||
|
||||
if( TEST_FLAG_SWEEP( grid, OBSTACLE )) {
|
||||
nObstacleCells++;
|
||||
}
|
||||
else {
|
||||
if( TEST_FLAG_SWEEP( grid, ACCEL ))
|
||||
nAccelCells++;
|
||||
else
|
||||
nFluidCells++;
|
||||
|
||||
ux = + LOCAL( grid, E ) - LOCAL( grid, W )
|
||||
+ LOCAL( grid, NE ) - LOCAL( grid, NW )
|
||||
+ LOCAL( grid, SE ) - LOCAL( grid, SW )
|
||||
+ LOCAL( grid, ET ) + LOCAL( grid, EB )
|
||||
- LOCAL( grid, WT ) - LOCAL( grid, WB );
|
||||
uy = + LOCAL( grid, N ) - LOCAL( grid, S )
|
||||
+ LOCAL( grid, NE ) + LOCAL( grid, NW )
|
||||
- LOCAL( grid, SE ) - LOCAL( grid, SW )
|
||||
+ LOCAL( grid, NT ) + LOCAL( grid, NB )
|
||||
- LOCAL( grid, ST ) - LOCAL( grid, SB );
|
||||
uz = + LOCAL( grid, T ) - LOCAL( grid, B )
|
||||
+ LOCAL( grid, NT ) - LOCAL( grid, NB )
|
||||
+ LOCAL( grid, ST ) - LOCAL( grid, SB )
|
||||
+ LOCAL( grid, ET ) - LOCAL( grid, EB )
|
||||
+ LOCAL( grid, WT ) - LOCAL( grid, WB );
|
||||
u2 = (ux*ux + uy*uy + uz*uz) / (rho*rho);
|
||||
if( u2 < minU2 ) minU2 = u2;
|
||||
if( u2 > maxU2 ) maxU2 = u2;
|
||||
}
|
||||
SWEEP_END
|
||||
|
||||
printf( "LBM_showGridStatistics:\n"
|
||||
"\tnObstacleCells: %7i nAccelCells: %7i nFluidCells: %7i\n"
|
||||
"\tminRho: %8.4f maxRho: %8.4f mass: %e\n"
|
||||
"\tminU: %e maxU: %e\n\n",
|
||||
nObstacleCells, nAccelCells, nFluidCells,
|
||||
minRho, maxRho, mass,
|
||||
sqrt( minU2 ), sqrt( maxU2 ) );
|
||||
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
static void storeValue( FILE* file, OUTPUT_PRECISION* v ) {
|
||||
const int litteBigEndianTest = 1;
|
||||
if( (*((unsigned char*) &litteBigEndianTest)) == 0 ) { /* big endian */
|
||||
const char* vPtr = (char*) v;
|
||||
char buffer[sizeof( OUTPUT_PRECISION )];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sizeof( OUTPUT_PRECISION ); i++)
|
||||
buffer[i] = vPtr[sizeof( OUTPUT_PRECISION ) - i - 1];
|
||||
|
||||
fwrite( buffer, sizeof( OUTPUT_PRECISION ), 1, file );
|
||||
}
|
||||
else { /* little endian */
|
||||
fwrite( v, sizeof( OUTPUT_PRECISION ), 1, file );
|
||||
}
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
static void loadValue( FILE* file, OUTPUT_PRECISION* v ) {
|
||||
const int litteBigEndianTest = 1;
|
||||
if( (*((unsigned char*) &litteBigEndianTest)) == 0 ) { /* big endian */
|
||||
char* vPtr = (char*) v;
|
||||
char buffer[sizeof( OUTPUT_PRECISION )];
|
||||
int i;
|
||||
|
||||
fread( buffer, sizeof( OUTPUT_PRECISION ), 1, file );
|
||||
|
||||
for (i = 0; i < sizeof( OUTPUT_PRECISION ); i++)
|
||||
vPtr[i] = buffer[sizeof( OUTPUT_PRECISION ) - i - 1];
|
||||
}
|
||||
else { /* little endian */
|
||||
fread( v, sizeof( OUTPUT_PRECISION ), 1, file );
|
||||
}
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void LBM_storeVelocityField( LBM_Grid grid, const char* filename,
|
||||
const int binary ) {
|
||||
OUTPUT_PRECISION rho, ux, uy, uz;
|
||||
|
||||
FILE* file = fopen( filename, (binary ? "wb" : "w") );
|
||||
|
||||
SWEEP_VAR
|
||||
SWEEP_START(0,0,0,SIZE_X,SIZE_Y,SIZE_Z)
|
||||
rho = + SRC_C( grid ) + SRC_N( grid )
|
||||
+ SRC_S( grid ) + SRC_E( grid )
|
||||
+ SRC_W( grid ) + SRC_T( grid )
|
||||
+ SRC_B( grid ) + SRC_NE( grid )
|
||||
+ SRC_NW( grid ) + SRC_SE( grid )
|
||||
+ SRC_SW( grid ) + SRC_NT( grid )
|
||||
+ SRC_NB( grid ) + SRC_ST( grid )
|
||||
+ SRC_SB( grid ) + SRC_ET( grid )
|
||||
+ SRC_EB( grid ) + SRC_WT( grid )
|
||||
+ SRC_WB( grid );
|
||||
ux = + SRC_E( grid ) - SRC_W( grid )
|
||||
+ SRC_NE( grid ) - SRC_NW( grid )
|
||||
+ SRC_SE( grid ) - SRC_SW( grid )
|
||||
+ SRC_ET( grid ) + SRC_EB( grid )
|
||||
- SRC_WT( grid ) - SRC_WB( grid );
|
||||
uy = + SRC_N( grid ) - SRC_S( grid )
|
||||
+ SRC_NE( grid ) + SRC_NW( grid )
|
||||
- SRC_SE( grid ) - SRC_SW( grid )
|
||||
+ SRC_NT( grid ) + SRC_NB( grid )
|
||||
- SRC_ST( grid ) - SRC_SB( grid );
|
||||
uz = + SRC_T( grid ) - SRC_B( grid )
|
||||
+ SRC_NT( grid ) - SRC_NB( grid )
|
||||
+ SRC_ST( grid ) - SRC_SB( grid )
|
||||
+ SRC_ET( grid ) - SRC_EB( grid )
|
||||
+ SRC_WT( grid ) - SRC_WB( grid );
|
||||
ux /= rho;
|
||||
uy /= rho;
|
||||
uz /= rho;
|
||||
|
||||
if( binary ) {
|
||||
/*
|
||||
fwrite( &ux, sizeof( ux ), 1, file );
|
||||
fwrite( &uy, sizeof( uy ), 1, file );
|
||||
fwrite( &uz, sizeof( uz ), 1, file );
|
||||
*/
|
||||
storeValue( file, &ux );
|
||||
storeValue( file, &uy );
|
||||
storeValue( file, &uz );
|
||||
} else
|
||||
fprintf( file, "%e %e %e\n", ux, uy, uz );
|
||||
|
||||
SWEEP_END;
|
||||
|
||||
fclose( file );
|
||||
}
|
||||
39
benchmarks/opencl/lbm/lbm.h
Normal file
39
benchmarks/opencl/lbm/lbm.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#ifndef _LBM_H_
|
||||
#define _LBM_H_
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#include "ocl.h"
|
||||
#include "lbm_macros.h"
|
||||
|
||||
void LBM_allocateGrid( float** ptr );
|
||||
void LBM_freeGrid( float** ptr );
|
||||
void LBM_initializeGrid( LBM_Grid grid );
|
||||
void LBM_initializeSpecialCellsForLDC( LBM_Grid grid );
|
||||
void LBM_loadObstacleFile( LBM_Grid grid, const char* filename );
|
||||
void LBM_swapGrids( cl_mem* grid1, cl_mem* grid2 );
|
||||
void LBM_showGridStatistics( LBM_Grid Grid );
|
||||
void LBM_storeVelocityField( LBM_Grid grid, const char* filename,
|
||||
const BOOL binary );
|
||||
|
||||
/* OpenCL *********************************************************************/
|
||||
|
||||
void OpenCL_LBM_allocateGrid( const OpenCL_Param* prm, cl_mem* ptr );
|
||||
void OpenCL_LBM_freeGrid( cl_mem ptr );
|
||||
void OpenCL_LBM_initializeGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid h_grid );
|
||||
void OpenCL_LBM_getDeviceGrid( const OpenCL_Param* prm, cl_mem d_grid, LBM_Grid h_grid );
|
||||
void OpenCL_LBM_performStreamCollide( const OpenCL_Param* prm, cl_mem srcGrid, cl_mem dstGrid );
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#endif /* _LBM_H_ */
|
||||
177
benchmarks/opencl/lbm/lbm_macros.h
Normal file
177
benchmarks/opencl/lbm/lbm_macros.h
Normal file
@@ -0,0 +1,177 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef _LBM_MARCOS_H
|
||||
#define _LBM_MACROS_H_
|
||||
|
||||
#define OMEGA (1.95f)
|
||||
|
||||
#define OUTPUT_PRECISION float
|
||||
|
||||
#define BOOL int
|
||||
#define TRUE (-1)
|
||||
#define FALSE (0)
|
||||
|
||||
#define DFL1 (1.0f/ 3.0f)
|
||||
#define DFL2 (1.0f/18.0f)
|
||||
#define DFL3 (1.0f/36.0f)
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
typedef float* LBM_Grid;//float LBM_Grid[PADDED_Z*PADDED_Y*PADDED_X*N_CELL_ENTRIES];
|
||||
typedef LBM_Grid* LBM_GridPtr;
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
|
||||
#define SWEEP_X __temp_x__
|
||||
#define SWEEP_Y __temp_y__
|
||||
#define SWEEP_Z __temp_z__
|
||||
#define SWEEP_VAR int __temp_x__, __temp_y__, __temp_z__;
|
||||
|
||||
#define SWEEP_START(x1,y1,z1,x2,y2,z2) \
|
||||
for( __temp_z__ = z1; \
|
||||
__temp_z__ < z2; \
|
||||
__temp_z__++) { \
|
||||
for( __temp_y__ = 0; \
|
||||
__temp_y__ < SIZE_Y; \
|
||||
__temp_y__++) { \
|
||||
for(__temp_x__ = 0; \
|
||||
__temp_x__ < SIZE_X; \
|
||||
__temp_x__++) { \
|
||||
|
||||
#define SWEEP_END }}}
|
||||
|
||||
|
||||
#define GRID_ENTRY(g,x,y,z,e) ((g)[CALC_INDEX( x, y, z, e)])
|
||||
#define GRID_ENTRY_SWEEP(g,dx,dy,dz,e) ((g)[CALC_INDEX((dx)+SWEEP_X, (dy)+SWEEP_Y, (dz)+SWEEP_Z, e)])
|
||||
|
||||
#define LOCAL(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
|
||||
#define NEIGHBOR_C(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
|
||||
#define NEIGHBOR_N(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, 0, e ))
|
||||
#define NEIGHBOR_S(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, 0, e ))
|
||||
#define NEIGHBOR_E(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, 0, e ))
|
||||
#define NEIGHBOR_W(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, 0, e ))
|
||||
#define NEIGHBOR_T(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, +1, e ))
|
||||
#define NEIGHBOR_B(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, -1, e ))
|
||||
#define NEIGHBOR_NE(g,e) (GRID_ENTRY_SWEEP( g, +1, +1, 0, e ))
|
||||
#define NEIGHBOR_NW(g,e) (GRID_ENTRY_SWEEP( g, -1, +1, 0, e ))
|
||||
#define NEIGHBOR_SE(g,e) (GRID_ENTRY_SWEEP( g, +1, -1, 0, e ))
|
||||
#define NEIGHBOR_SW(g,e) (GRID_ENTRY_SWEEP( g, -1, -1, 0, e ))
|
||||
#define NEIGHBOR_NT(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, +1, e ))
|
||||
#define NEIGHBOR_NB(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, -1, e ))
|
||||
#define NEIGHBOR_ST(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, +1, e ))
|
||||
#define NEIGHBOR_SB(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, -1, e ))
|
||||
#define NEIGHBOR_ET(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, +1, e ))
|
||||
#define NEIGHBOR_EB(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, -1, e ))
|
||||
#define NEIGHBOR_WT(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, +1, e ))
|
||||
#define NEIGHBOR_WB(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, -1, e ))
|
||||
|
||||
|
||||
#ifdef SCATTER
|
||||
|
||||
#define SRC_C(g) (LOCAL( g, C ))
|
||||
#define SRC_N(g) (LOCAL( g, N ))
|
||||
#define SRC_S(g) (LOCAL( g, S ))
|
||||
#define SRC_E(g) (LOCAL( g, E ))
|
||||
#define SRC_W(g) (LOCAL( g, W ))
|
||||
#define SRC_T(g) (LOCAL( g, T ))
|
||||
#define SRC_B(g) (LOCAL( g, B ))
|
||||
#define SRC_NE(g) (LOCAL( g, NE ))
|
||||
#define SRC_NW(g) (LOCAL( g, NW ))
|
||||
#define SRC_SE(g) (LOCAL( g, SE ))
|
||||
#define SRC_SW(g) (LOCAL( g, SW ))
|
||||
#define SRC_NT(g) (LOCAL( g, NT ))
|
||||
#define SRC_NB(g) (LOCAL( g, NB ))
|
||||
#define SRC_ST(g) (LOCAL( g, ST ))
|
||||
#define SRC_SB(g) (LOCAL( g, SB ))
|
||||
#define SRC_ET(g) (LOCAL( g, ET ))
|
||||
#define SRC_EB(g) (LOCAL( g, EB ))
|
||||
#define SRC_WT(g) (LOCAL( g, WT ))
|
||||
#define SRC_WB(g) (LOCAL( g, WB ))
|
||||
|
||||
#define DST_C(g) (NEIGHBOR_C ( g, C ))
|
||||
#define DST_N(g) (NEIGHBOR_N ( g, N ))
|
||||
#define DST_S(g) (NEIGHBOR_S ( g, S ))
|
||||
#define DST_E(g) (NEIGHBOR_E ( g, E ))
|
||||
#define DST_W(g) (NEIGHBOR_W ( g, W ))
|
||||
#define DST_T(g) (NEIGHBOR_T ( g, T ))
|
||||
#define DST_B(g) (NEIGHBOR_B ( g, B ))
|
||||
#define DST_NE(g) (NEIGHBOR_NE( g, NE ))
|
||||
#define DST_NW(g) (NEIGHBOR_NW( g, NW ))
|
||||
#define DST_SE(g) (NEIGHBOR_SE( g, SE ))
|
||||
#define DST_SW(g) (NEIGHBOR_SW( g, SW ))
|
||||
#define DST_NT(g) (NEIGHBOR_NT( g, NT ))
|
||||
#define DST_NB(g) (NEIGHBOR_NB( g, NB ))
|
||||
#define DST_ST(g) (NEIGHBOR_ST( g, ST ))
|
||||
#define DST_SB(g) (NEIGHBOR_SB( g, SB ))
|
||||
#define DST_ET(g) (NEIGHBOR_ET( g, ET ))
|
||||
#define DST_EB(g) (NEIGHBOR_EB( g, EB ))
|
||||
#define DST_WT(g) (NEIGHBOR_WT( g, WT ))
|
||||
#define DST_WB(g) (NEIGHBOR_WB( g, WB ))
|
||||
|
||||
#else /* GATHER */
|
||||
|
||||
#define SRC_C(g) (NEIGHBOR_C ( g, C ))
|
||||
#define SRC_N(g) (NEIGHBOR_S ( g, N ))
|
||||
#define SRC_S(g) (NEIGHBOR_N ( g, S ))
|
||||
#define SRC_E(g) (NEIGHBOR_W ( g, E ))
|
||||
#define SRC_W(g) (NEIGHBOR_E ( g, W ))
|
||||
#define SRC_T(g) (NEIGHBOR_B ( g, T ))
|
||||
#define SRC_B(g) (NEIGHBOR_T ( g, B ))
|
||||
#define SRC_NE(g) (NEIGHBOR_SW( g, NE ))
|
||||
#define SRC_NW(g) (NEIGHBOR_SE( g, NW ))
|
||||
#define SRC_SE(g) (NEIGHBOR_NW( g, SE ))
|
||||
#define SRC_SW(g) (NEIGHBOR_NE( g, SW ))
|
||||
#define SRC_NT(g) (NEIGHBOR_SB( g, NT ))
|
||||
#define SRC_NB(g) (NEIGHBOR_ST( g, NB ))
|
||||
#define SRC_ST(g) (NEIGHBOR_NB( g, ST ))
|
||||
#define SRC_SB(g) (NEIGHBOR_NT( g, SB ))
|
||||
#define SRC_ET(g) (NEIGHBOR_WB( g, ET ))
|
||||
#define SRC_EB(g) (NEIGHBOR_WT( g, EB ))
|
||||
#define SRC_WT(g) (NEIGHBOR_EB( g, WT ))
|
||||
#define SRC_WB(g) (NEIGHBOR_ET( g, WB ))
|
||||
|
||||
#define DST_C(g) (LOCAL( g, C ))
|
||||
#define DST_N(g) (LOCAL( g, N ))
|
||||
#define DST_S(g) (LOCAL( g, S ))
|
||||
#define DST_E(g) (LOCAL( g, E ))
|
||||
#define DST_W(g) (LOCAL( g, W ))
|
||||
#define DST_T(g) (LOCAL( g, T ))
|
||||
#define DST_B(g) (LOCAL( g, B ))
|
||||
#define DST_NE(g) (LOCAL( g, NE ))
|
||||
#define DST_NW(g) (LOCAL( g, NW ))
|
||||
#define DST_SE(g) (LOCAL( g, SE ))
|
||||
#define DST_SW(g) (LOCAL( g, SW ))
|
||||
#define DST_NT(g) (LOCAL( g, NT ))
|
||||
#define DST_NB(g) (LOCAL( g, NB ))
|
||||
#define DST_ST(g) (LOCAL( g, ST ))
|
||||
#define DST_SB(g) (LOCAL( g, SB ))
|
||||
#define DST_ET(g) (LOCAL( g, ET ))
|
||||
#define DST_EB(g) (LOCAL( g, EB ))
|
||||
#define DST_WT(g) (LOCAL( g, WT ))
|
||||
#define DST_WB(g) (LOCAL( g, WB ))
|
||||
|
||||
#endif /* GATHER */
|
||||
|
||||
#define MAGIC_CAST(v) ((unsigned int*) ((void*) (&(v))))
|
||||
#define FLAG_VAR(v) unsigned int* _aux_ = MAGIC_CAST(v)
|
||||
|
||||
#define TEST_FLAG_SWEEP(g,f) ((*MAGIC_CAST(LOCAL(g, FLAGS))) & (f))
|
||||
#define SET_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) |= (f);}
|
||||
#define CLEAR_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) &= ~(f);}
|
||||
#define CLEAR_ALL_FLAGS_SWEEP(g) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) = 0;}
|
||||
|
||||
#define TEST_FLAG(g,x,y,z,f) ((*MAGIC_CAST(GRID_ENTRY(g, x, y, z, FLAGS))) & (f))
|
||||
#define SET_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) |= (f);}
|
||||
#define CLEAR_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) &= ~(f);}
|
||||
#define CLEAR_ALL_FLAGS(g,x,y,z) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) = 0;}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#endif /* _CONFIG_H_ */
|
||||
BIN
benchmarks/opencl/lbm/liblbm.a
Normal file
BIN
benchmarks/opencl/lbm/liblbm.a
Normal file
Binary file not shown.
238
benchmarks/opencl/lbm/main.cc
Normal file
238
benchmarks/opencl/lbm/main.cc
Normal file
@@ -0,0 +1,238 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <parboil.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "layout_config.h"
|
||||
#include "lbm.h"
|
||||
#include "lbm_macros.h"
|
||||
#include "main.h"
|
||||
#include "ocl.h"
|
||||
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
static cl_mem OpenCL_srcGrid, OpenCL_dstGrid;
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
struct pb_TimerSet timers;
|
||||
int main(int nArgs, char *arg[]) {
|
||||
MAIN_Param param;
|
||||
int t;
|
||||
|
||||
OpenCL_Param prm;
|
||||
|
||||
pb_InitializeTimerSet(&timers);
|
||||
struct pb_Parameters *params;
|
||||
params = pb_ReadParameters(&nArgs, arg);
|
||||
|
||||
params->inpFiles = (char **)malloc(sizeof(char *) * 2);
|
||||
params->inpFiles[0] = (char *)malloc(100);
|
||||
params->inpFiles[1] = NULL;
|
||||
strncpy(params->inpFiles[0], "120_120_150_ldc.of", 100);
|
||||
|
||||
static LBM_GridPtr TEMP_srcGrid;
|
||||
// Setup TEMP datastructures
|
||||
LBM_allocateGrid((float **)&TEMP_srcGrid);
|
||||
MAIN_parseCommandLine(nArgs, arg, ¶m, params);
|
||||
MAIN_printInfo(¶m);
|
||||
|
||||
OpenCL_initialize(params, &prm);
|
||||
MAIN_initialize(¶m, &prm);
|
||||
|
||||
for (t = 1; t <= param.nTimeSteps; t++) {
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
|
||||
OpenCL_LBM_performStreamCollide(&prm, OpenCL_srcGrid, OpenCL_dstGrid);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
LBM_swapGrids(&OpenCL_srcGrid, &OpenCL_dstGrid);
|
||||
|
||||
if ((t & 63) == 0) {
|
||||
printf("timestep: %i\n", t);
|
||||
#if 0
|
||||
CUDA_LBM_getDeviceGrid((float**)&CUDA_srcGrid, (float**)&TEMP_srcGrid);
|
||||
LBM_showGridStatistics( *TEMP_srcGrid );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
MAIN_finalize(¶m, &prm);
|
||||
|
||||
LBM_freeGrid((float **)&TEMP_srcGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
|
||||
pb_PrintTimerSet(&timers);
|
||||
pb_FreeParameters(params);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void MAIN_parseCommandLine(int nArgs, char *arg[], MAIN_Param *param,
|
||||
struct pb_Parameters *params) {
|
||||
struct stat fileStat;
|
||||
|
||||
/*if (nArgs < 2) {
|
||||
printf("syntax: lbm <time steps>\n");
|
||||
exit(1);
|
||||
}*/
|
||||
|
||||
param->nTimeSteps = 4; //atoi(arg[1]);
|
||||
|
||||
if (params->inpFiles[0] != NULL) {
|
||||
param->obstacleFilename = params->inpFiles[0];
|
||||
|
||||
/*if (stat(param->obstacleFilename, &fileStat) != 0) {
|
||||
printf("MAIN_parseCommandLine: cannot stat obstacle file '%s'\n",
|
||||
param->obstacleFilename);
|
||||
exit(1);
|
||||
}
|
||||
if (fileStat.st_size != SIZE_X * SIZE_Y * SIZE_Z + (SIZE_Y + 1) * SIZE_Z) {
|
||||
printf("MAIN_parseCommandLine:\n"
|
||||
"\tsize of file '%s' is %i bytes\n"
|
||||
"\texpected size is %i bytes\n",
|
||||
param->obstacleFilename, (int)fileStat.st_size,
|
||||
SIZE_X * SIZE_Y * SIZE_Z + (SIZE_Y + 1) * SIZE_Z);
|
||||
exit(1);
|
||||
}*/
|
||||
} else
|
||||
param->obstacleFilename = NULL;
|
||||
|
||||
param->resultFilename = params->outFile;
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void MAIN_printInfo(const MAIN_Param *param) {
|
||||
printf("MAIN_printInfo:\n"
|
||||
"\tgrid size : %i x %i x %i = %.2f * 10^6 Cells\n"
|
||||
"\tnTimeSteps : %i\n"
|
||||
"\tresult file : %s\n"
|
||||
"\taction : %s\n"
|
||||
"\tsimulation type: %s\n"
|
||||
"\tobstacle file : %s\n\n",
|
||||
SIZE_X, SIZE_Y, SIZE_Z, 1e-6 * SIZE_X * SIZE_Y * SIZE_Z,
|
||||
param->nTimeSteps, param->resultFilename, "store", "lid-driven cavity",
|
||||
(param->obstacleFilename == NULL) ? "<none>"
|
||||
: param->obstacleFilename);
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void MAIN_initialize(const MAIN_Param *param, const OpenCL_Param *prm) {
|
||||
static LBM_Grid TEMP_srcGrid, TEMP_dstGrid;
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
// Setup TEMP datastructures
|
||||
LBM_allocateGrid((float **)&TEMP_srcGrid);
|
||||
LBM_allocateGrid((float **)&TEMP_dstGrid);
|
||||
LBM_initializeGrid(TEMP_srcGrid);
|
||||
LBM_initializeGrid(TEMP_dstGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
if (param->obstacleFilename != NULL) {
|
||||
LBM_loadObstacleFile(TEMP_srcGrid, param->obstacleFilename);
|
||||
LBM_loadObstacleFile(TEMP_dstGrid, param->obstacleFilename);
|
||||
}
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
LBM_initializeSpecialCellsForLDC(TEMP_srcGrid);
|
||||
LBM_initializeSpecialCellsForLDC(TEMP_dstGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
|
||||
printf("OK+\n");
|
||||
|
||||
// Setup DEVICE datastructures
|
||||
OpenCL_LBM_allocateGrid(prm, &OpenCL_srcGrid);
|
||||
OpenCL_LBM_allocateGrid(prm, &OpenCL_dstGrid);
|
||||
|
||||
printf("OK-\n");
|
||||
|
||||
// Initialize DEVICE datastructures
|
||||
OpenCL_LBM_initializeGrid(prm, OpenCL_srcGrid, TEMP_srcGrid);
|
||||
OpenCL_LBM_initializeGrid(prm, OpenCL_dstGrid, TEMP_dstGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
LBM_showGridStatistics(TEMP_srcGrid);
|
||||
|
||||
LBM_freeGrid((float **)&TEMP_srcGrid);
|
||||
LBM_freeGrid((float **)&TEMP_dstGrid);
|
||||
|
||||
printf("OK\n");
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void MAIN_finalize(const MAIN_Param *param, const OpenCL_Param *prm) {
|
||||
LBM_Grid TEMP_srcGrid;
|
||||
|
||||
// Setup TEMP datastructures
|
||||
LBM_allocateGrid((float **)&TEMP_srcGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
OpenCL_LBM_getDeviceGrid(prm, OpenCL_srcGrid, TEMP_srcGrid);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
LBM_showGridStatistics(TEMP_srcGrid);
|
||||
|
||||
LBM_storeVelocityField(TEMP_srcGrid, param->resultFilename, TRUE);
|
||||
|
||||
LBM_freeGrid((float **)&TEMP_srcGrid);
|
||||
OpenCL_LBM_freeGrid(OpenCL_srcGrid);
|
||||
OpenCL_LBM_freeGrid(OpenCL_dstGrid);
|
||||
|
||||
clReleaseProgram(prm->clProgram);
|
||||
clReleaseKernel(prm->clKernel);
|
||||
clReleaseCommandQueue(prm->clCommandQueue);
|
||||
clReleaseContext(prm->clContext);
|
||||
}
|
||||
|
||||
void OpenCL_initialize(struct pb_Parameters *p, OpenCL_Param *prm) {
|
||||
cl_int clStatus;
|
||||
pb_Context *pb_context;
|
||||
pb_context = pb_InitOpenCLContext(p);
|
||||
if (pb_context == NULL) {
|
||||
fprintf(stderr, "Error: No OpenCL platform/device can be found.");
|
||||
return;
|
||||
}
|
||||
prm->clDevice = (cl_device_id)pb_context->clDeviceId;
|
||||
prm->clPlatform = (cl_platform_id)pb_context->clPlatformId;
|
||||
prm->clContext = (cl_context)pb_context->clContext;
|
||||
|
||||
prm->clCommandQueue = clCreateCommandQueue(
|
||||
prm->clContext, prm->clDevice, CL_QUEUE_PROFILING_ENABLE, &clStatus);
|
||||
CHECK_ERROR("clCreateCommandQueue")
|
||||
|
||||
pb_SetOpenCL(&(prm->clContext), &(prm->clCommandQueue));
|
||||
|
||||
//const char *clSource[] = {readFile("src/opencl_base/kernel.cl")};
|
||||
//prm->clProgram = clCreateProgramWithSource(prm->clContext, 1, clSource, NULL, &clStatus);
|
||||
prm->clProgram = clCreateProgramWithBuiltInKernels(
|
||||
prm->clContext, 1, &prm->clDevice, "performStreamCollide_kernel", &clStatus);
|
||||
CHECK_ERROR("clCreateProgramWithSource")
|
||||
|
||||
//char clOptions[100];
|
||||
//sprintf(clOptions, "-I src/opencl_base");
|
||||
//clStatus = clBuildProgram(prm->clProgram, 1, &(prm->clDevice), clOptions, NULL, NULL);
|
||||
clStatus = clBuildProgram(prm->clProgram, 1, &prm->clDevice, NULL, NULL, NULL);
|
||||
CHECK_ERROR("clBuildProgram")
|
||||
|
||||
prm->clKernel =
|
||||
clCreateKernel(prm->clProgram, "performStreamCollide_kernel", &clStatus);
|
||||
CHECK_ERROR("clCreateKernel")
|
||||
|
||||
//free((void *)clSource[0]);
|
||||
}
|
||||
31
benchmarks/opencl/lbm/main.h
Normal file
31
benchmarks/opencl/lbm/main.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef _MAIN_H_
|
||||
#define _MAIN_H_
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
typedef struct {
|
||||
int nTimeSteps;
|
||||
char* resultFilename;
|
||||
char* obstacleFilename;
|
||||
} MAIN_Param;
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
void MAIN_parseCommandLine( int nArgs, char* arg[], MAIN_Param* param, struct pb_Parameters* );
|
||||
void MAIN_printInfo( const MAIN_Param* param );
|
||||
void MAIN_initialize( const MAIN_Param* param, const OpenCL_Param* prm );
|
||||
void MAIN_finalize( const MAIN_Param* param, const OpenCL_Param* prm );
|
||||
|
||||
void OpenCL_initialize(struct pb_Parameters*, OpenCL_Param* prm);
|
||||
|
||||
/*############################################################################*/
|
||||
|
||||
#endif /* _MAIN_H_ */
|
||||
40
benchmarks/opencl/lbm/ocl.c
Normal file
40
benchmarks/opencl/lbm/ocl.c
Normal file
@@ -0,0 +1,40 @@
|
||||
#include <CL/cl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "ocl.h"
|
||||
|
||||
char* readFile(char* fileName)
|
||||
{
|
||||
FILE* fp;
|
||||
fp = fopen(fileName,"r");
|
||||
|
||||
if(fp == NULL)
|
||||
{
|
||||
printf("Error 1!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fseek(fp,0,SEEK_END);
|
||||
long size = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
char* buffer = (char*)malloc(sizeof(char)*(size+1));
|
||||
if(buffer == NULL)
|
||||
{
|
||||
printf("Error 2!\n");
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t res = fread(buffer,1,size,fp);
|
||||
if(res != size)
|
||||
{
|
||||
printf("Error 3!\n");
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer[size] = 0;
|
||||
fclose(fp);
|
||||
return buffer;
|
||||
}
|
||||
25
benchmarks/opencl/lbm/ocl.h
Normal file
25
benchmarks/opencl/lbm/ocl.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef __OCLH__
|
||||
#define __OCLH__
|
||||
|
||||
typedef struct {
|
||||
cl_platform_id clPlatform;
|
||||
cl_context_properties clCps[3];
|
||||
cl_device_id clDevice;
|
||||
cl_context clContext;
|
||||
cl_command_queue clCommandQueue;
|
||||
cl_program clProgram;
|
||||
cl_kernel clKernel;
|
||||
} OpenCL_Param;
|
||||
|
||||
|
||||
#define CHECK_ERROR(errorMessage) \
|
||||
if(clStatus != CL_SUCCESS) \
|
||||
{ \
|
||||
printf("Error: %s!\n",errorMessage); \
|
||||
printf("Line: %d\n",__LINE__); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
char* readFile(char*);
|
||||
|
||||
#endif
|
||||
348
benchmarks/opencl/lbm/parboil.h
Normal file
348
benchmarks/opencl/lbm/parboil.h
Normal file
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
* (c) 2010 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
#ifndef PARBOIL_HEADER
|
||||
#define PARBOIL_HEADER
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
/* A platform as specified by the user on the command line */
|
||||
struct pb_PlatformParam {
|
||||
char *name; /* The platform name. This string is owned. */
|
||||
char *version; /* The platform version; may be NULL.
|
||||
* This string is owned. */
|
||||
};
|
||||
|
||||
/* Create a PlatformParam from the given strings.
|
||||
* 'name' must not be NULL. 'version' may be NULL.
|
||||
* If not NULL, the strings should have been allocated by malloc(),
|
||||
* and they will be owned by the returned object.
|
||||
*/
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version);
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *);
|
||||
|
||||
/* A criterion for how to select a device */
|
||||
enum pb_DeviceSelectionCriterion {
|
||||
pb_Device_INDEX, /* Enumerate the devices and select one
|
||||
* by its number */
|
||||
pb_Device_CPU, /* Select a CPU device */
|
||||
pb_Device_GPU, /* Select a GPU device */
|
||||
pb_Device_ACCELERATOR, /* Select an accelerator device */
|
||||
pb_Device_NAME /* Select a device by name */
|
||||
};
|
||||
|
||||
/* A device as specified by the user on the command line */
|
||||
struct pb_DeviceParam {
|
||||
enum pb_DeviceSelectionCriterion criterion;
|
||||
union {
|
||||
int index; /* If criterion == pb_Device_INDEX,
|
||||
* the index of the device */
|
||||
char *name; /* If criterion == pb_Device_NAME,
|
||||
* the name of the device.
|
||||
* This string is owned. */
|
||||
};
|
||||
};
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void);
|
||||
|
||||
/* Create a by-name device selection criterion.
|
||||
* The string should have been allocated by malloc(), and it will will be
|
||||
* owned by the returned object.
|
||||
*/
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name);
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *);
|
||||
|
||||
/* Command line parameters for benchmarks */
|
||||
struct pb_Parameters {
|
||||
char *outFile; /* If not NULL, the raw output of the
|
||||
* computation should be saved to this
|
||||
* file. The string is owned. */
|
||||
char **inpFiles; /* A NULL-terminated array of strings
|
||||
* holding the input file(s) for the
|
||||
* computation. The array and strings
|
||||
* are owned. */
|
||||
struct pb_PlatformParam *platform; /* If not NULL, the platform
|
||||
* specified on the command line. */
|
||||
struct pb_DeviceParam *device; /* If not NULL, the device
|
||||
* specified on the command line. */
|
||||
};
|
||||
|
||||
/* Read command-line parameters.
|
||||
*
|
||||
* The argc and argv parameters to main are read, and any parameters
|
||||
* interpreted by this function are removed from the argument list.
|
||||
*
|
||||
* A new instance of struct pb_Parameters is returned.
|
||||
* If there is an error, then an error message is printed on stderr
|
||||
* and NULL is returned.
|
||||
*/
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv);
|
||||
|
||||
/* Free an instance of struct pb_Parameters.
|
||||
*/
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p);
|
||||
|
||||
void
|
||||
pb_FreeStringArray(char **);
|
||||
|
||||
/* Count the number of input files in a pb_Parameters instance.
|
||||
*/
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p);
|
||||
|
||||
/* A time or duration. */
|
||||
//#if _POSIX_VERSION >= 200112L
|
||||
typedef unsigned long long pb_Timestamp; /* time in microseconds */
|
||||
//#else
|
||||
//# error "Timestamps not implemented"
|
||||
//#endif
|
||||
|
||||
enum pb_TimerState {
|
||||
pb_Timer_STOPPED,
|
||||
pb_Timer_RUNNING,
|
||||
};
|
||||
|
||||
struct pb_Timer {
|
||||
enum pb_TimerState state;
|
||||
pb_Timestamp elapsed; /* Amount of time elapsed so far */
|
||||
pb_Timestamp init; /* Beginning of the current time interval,
|
||||
* if state is RUNNING. End of the last
|
||||
* recorded time interfal otherwise. */
|
||||
};
|
||||
|
||||
/* Reset a timer.
|
||||
* Use this to initialize a timer or to clear
|
||||
* its elapsed time. The reset timer is stopped.
|
||||
*/
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Start a timer. The timer is set to RUNNING mode and
|
||||
* time elapsed while the timer is running is added to
|
||||
* the timer.
|
||||
* The timer should not already be running.
|
||||
*/
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Stop a timer.
|
||||
* This stops adding elapsed time to the timer.
|
||||
* The timer should not already be stopped.
|
||||
*/
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer);
|
||||
|
||||
/* Execution time is assigned to one of these categories. */
|
||||
enum pb_TimerID {
|
||||
pb_TimerID_NONE = 0,
|
||||
pb_TimerID_IO, /* Time spent in input/output */
|
||||
pb_TimerID_KERNEL, /* Time spent computing on the device,
|
||||
* recorded asynchronously */
|
||||
pb_TimerID_COPY, /* Time spent synchronously moving data
|
||||
* to/from device and allocating/freeing
|
||||
* memory on the device */
|
||||
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
|
||||
* driver, primarily for recording the time
|
||||
* spent queueing asynchronous operations */
|
||||
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
|
||||
pb_TimerID_COMPUTE, /* Time for all program execution other
|
||||
* than parsing command line arguments,
|
||||
* I/O, kernel, and copy */
|
||||
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
|
||||
* host activity: automatically filled in,
|
||||
* not intended for direct usage */
|
||||
pb_TimerID_LAST /* Number of timer IDs */
|
||||
};
|
||||
|
||||
/* Dynamic list of asynchronously tracked times between events */
|
||||
struct pb_async_time_marker_list {
|
||||
char *label; // actually just a pointer to a string
|
||||
enum pb_TimerID timerID; /* The ID to which the interval beginning
|
||||
* with this marker should be attributed */
|
||||
void * marker;
|
||||
//cudaEvent_t marker; /* The driver event for this marker */
|
||||
struct pb_async_time_marker_list *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimer {
|
||||
char *label;
|
||||
struct pb_Timer timer;
|
||||
struct pb_SubTimer *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimerList {
|
||||
struct pb_SubTimer *current;
|
||||
struct pb_SubTimer *subtimer_list;
|
||||
};
|
||||
|
||||
/* A set of timers for recording execution times. */
|
||||
struct pb_TimerSet {
|
||||
enum pb_TimerID current;
|
||||
struct pb_async_time_marker_list* async_markers;
|
||||
pb_Timestamp async_begin;
|
||||
pb_Timestamp wall_begin;
|
||||
struct pb_Timer timers[pb_TimerID_LAST];
|
||||
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
|
||||
};
|
||||
|
||||
/* Reset all timers in the set. */
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
|
||||
|
||||
/* Select which timer the next interval of time should be accounted
|
||||
* to. The selected timer is started and other timers are stopped.
|
||||
* Using pb_TimerID_NONE stops all timers. */
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
|
||||
|
||||
/* Print timer values to standard output. */
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
/* Release timer resources */
|
||||
void
|
||||
pb_DestroyTimerSet(struct pb_TimerSet * timers);
|
||||
|
||||
void
|
||||
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
|
||||
|
||||
|
||||
typedef struct pb_Device_tag {
|
||||
char* name;
|
||||
void* clDevice;
|
||||
int id;
|
||||
unsigned int in_use;
|
||||
unsigned int available;
|
||||
} pb_Device;
|
||||
|
||||
struct pb_Context_tag;
|
||||
typedef struct pb_Context_tag pb_Context;
|
||||
|
||||
typedef struct pb_Platform_tag {
|
||||
char* name;
|
||||
char* version;
|
||||
void* clPlatform;
|
||||
unsigned int in_use;
|
||||
pb_Context** contexts;
|
||||
pb_Device** devices;
|
||||
} pb_Platform;
|
||||
|
||||
struct pb_Context_tag {
|
||||
void* clPlatformId;
|
||||
void* clContext;
|
||||
void* clDeviceId;
|
||||
pb_Platform* pb_platform;
|
||||
pb_Device* pb_device;
|
||||
};
|
||||
|
||||
// verbosely print out list of platforms and their devices to the console.
|
||||
pb_Platform**
|
||||
pb_GetPlatforms();
|
||||
|
||||
// Choose a platform according to the given platform specification
|
||||
pb_Platform*
|
||||
pb_GetPlatform(struct pb_PlatformParam *platform);
|
||||
|
||||
// choose a platform: by name, name & version
|
||||
pb_Platform*
|
||||
pb_GetPlatformByName(const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
|
||||
|
||||
// Choose a device according to the given device specification
|
||||
pb_Device*
|
||||
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
|
||||
|
||||
pb_Device**
|
||||
pb_GetDevices(pb_Platform* pb_platform);
|
||||
|
||||
// choose a device by name.
|
||||
pb_Device*
|
||||
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByEnvVars();
|
||||
|
||||
pb_Context*
|
||||
pb_InitOpenCLContext(struct pb_Parameters* parameters);
|
||||
|
||||
void
|
||||
pb_ReleasePlatforms();
|
||||
|
||||
void
|
||||
pb_ReleaseContext(pb_Context* c);
|
||||
|
||||
void
|
||||
pb_PrintPlatformInfo(pb_Context* c);
|
||||
|
||||
void
|
||||
perf_init();
|
||||
|
||||
//#define MEASURE_KERNEL_TIME
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef MEASURE_KERNEL_TIME
|
||||
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
|
||||
cl_int
|
||||
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel /* kernel */,
|
||||
cl_uint /* work_dim */,
|
||||
const size_t * /* global_work_offset */,
|
||||
const size_t * /* global_work_size */,
|
||||
const size_t * /* local_work_size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */);
|
||||
#endif
|
||||
|
||||
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
|
||||
void pb_sig_float(char*, float*, int);
|
||||
void pb_sig_double(char*, double*, int);
|
||||
void pb_sig_short(char*, short*, int);
|
||||
void pb_sig_int(char*, int*, int);
|
||||
void pb_sig_uchar(char*, unsigned char*, unsigned int);
|
||||
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //PARBOIL_HEADER
|
||||
|
||||
1394
benchmarks/opencl/lbm/parboil_opencl.c
Normal file
1394
benchmarks/opencl/lbm/parboil_opencl.c
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -93,8 +93,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
nx = 64;
|
||||
ny = 64;
|
||||
nz = 4;
|
||||
iteration = 2;
|
||||
nz = 8;
|
||||
iteration = 1;
|
||||
|
||||
cl_int clStatus;
|
||||
cl_context clContext;
|
||||
@@ -182,7 +182,7 @@ int main(int argc, char** argv) {
|
||||
printf("OK+\n");
|
||||
|
||||
//only use 1D thread block
|
||||
int tx = 256;
|
||||
int tx = 128;
|
||||
size_t block[3] = {tx,1,1};
|
||||
size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2};
|
||||
//size_t grid[3] = {nx-2,ny-2,nz-2};
|
||||
@@ -211,7 +211,7 @@ int main(int argc, char** argv) {
|
||||
{
|
||||
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,grid,block,0,NULL,NULL);
|
||||
printf("OK+0\n");
|
||||
|
||||
|
||||
//printf("iteration %d\n",t)
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user