This commit is contained in:
Blaise Tine
2019-11-25 14:07:51 -05:00
parent 8d26e402ca
commit e1197575e6
23 changed files with 10089 additions and 8 deletions

View File

@@ -0,0 +1,68 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH) -I.
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT = cutcp
SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c cutoff.c cutcpu.c output.c readatom.c excl.c
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View File

@@ -0,0 +1,617 @@
#include <parboil.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
/*****************************************************************************/
/* Memory management routines */
/* Free an array of owned strings. */
void
pb_FreeStringArray(char **string_array)
{
char **p;
if (!string_array) return;
for (p = string_array; *p; p++) free(*p);
free(string_array);
}
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version)
{
if (name == NULL) {
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
exit(-1);
}
struct pb_PlatformParam *ret =
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
ret->name = name;
ret->version = version;
return ret;
}
void
pb_FreePlatformParam(struct pb_PlatformParam *p)
{
if (p == NULL) return;
free(p->name);
free(p->version);
free(p);
}
struct pb_DeviceParam *
pb_DeviceParam_index(int index)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_INDEX;
ret->index = index;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_cpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_CPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_gpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_GPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_ACCELERATOR;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_name(char *name)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_NAME;
ret->name = name;
return ret;
}
void
pb_FreeDeviceParam(struct pb_DeviceParam *p)
{
if (p == NULL) return;
switch(p->criterion) {
case pb_Device_NAME:
free(p->name);
break;
case pb_Device_INDEX:
case pb_Device_CPU:
case pb_Device_ACCELERATOR:
break;
default:
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
exit(-1);
}
}
void
pb_FreeParameters(struct pb_Parameters *p)
{
free(p->outFile);
pb_FreeStringArray(p->inpFiles);
pb_FreePlatformParam(p->platform);
pb_FreeDeviceParam(p->device);
free(p);
}
/*****************************************************************************/
/* Parse a comma-delimited list of strings into an
* array of strings. */
static char **
read_string_array(char *in)
{
char **ret;
int i;
int count; /* Number of items in the input */
char *substring; /* Current substring within 'in' */
/* Count the number of items in the string */
count = 1;
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
/* Allocate storage */
ret = (char **)malloc((count + 1) * sizeof(char *));
/* Create copies of the strings from the list */
substring = in;
for (i = 0; i < count; i++) {
char *substring_end;
int substring_length;
/* Find length of substring */
for (substring_end = substring;
(*substring_end != ',') && (*substring_end != 0);
substring_end++);
substring_length = substring_end - substring;
/* Allocate memory and copy the substring */
ret[i] = (char *)malloc(substring_length + 1);
memcpy(ret[i], substring, substring_length);
ret[i][substring_length] = 0;
/* go to next substring */
substring = substring_end + 1;
}
ret[i] = NULL; /* Write the sentinel value */
return ret;
}
static void
report_parse_error(const char *str)
{
fputs(str, stderr);
}
/* Interpret a string as a 'pb_DeviceParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_DeviceParam *
read_device_param(char *str)
{
/* Try different ways of interpreting 'device_string' until one works */
/* If argument is an integer, then interpret it as a device index */
errno = 0;
char *end;
long device_int = strtol(str, &end, 10);
if (!errno) {
/* Negative numbers are not valid */
if (device_int < 0 || device_int > INT_MAX) return NULL;
return pb_DeviceParam_index(device_int);
}
/* Match against predefined strings */
if (strcmp(str, "CPU") == 0)
return pb_DeviceParam_cpu();
if (strcmp(str, "GPU") == 0)
return pb_DeviceParam_gpu();
if (strcmp(str, "ACCELERATOR") == 0)
return pb_DeviceParam_accelerator();
/* Assume any other string is a device name */
return pb_DeviceParam_name(strdup(str));
}
/* Interpret a string as a 'pb_PlatformParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_PlatformParam *
read_platform_param(char *str)
{
int separator_index; /* Index of the '-' character separating
* name and version number. It's -1 if
* there's no '-' character. */
/* Find the last occurrence of '-' in 'str' */
{
char *cur;
separator_index = -1;
for (cur = str; *cur; cur++) {
if (*cur == '-') separator_index = cur - str;
}
}
/* The platform name is either the entire string, or all characters before
* the separator */
int name_length = separator_index == -1 ? strlen(str) : separator_index;
char *name_str = (char *)malloc(name_length + 1);
memcpy(name_str, str, name_length);
name_str[name_length] = 0;
/* The version is either NULL, or all characters after the separator */
char *version_str;
if (separator_index == -1) {
version_str = NULL;
}
else {
const char *version_input_str = str + separator_index + 1;
int version_length = strlen(version_input_str);
version_str = (char *)malloc(version_length + 1);
memcpy(version_str, version_input_str, version_length);
version_str[version_length] = 0;
}
/* Create output structure */
return pb_PlatformParam(name_str, version_str);
}
/****************************************************************************/
/* Argument parsing state */
/* Argument parsing state.
*
* Arguments that are interpreted by the argument parser are removed from
* the list. Variables 'argc' and 'argn' do not count arguments that have
* been removed.
*
* During argument parsing, the array of arguments is compacted, overwriting
* the erased arguments. Variable 'argv_put' points to the array element
* where the next argument will be written. Variable 'argv_get' points to
* the array element where the next argument will be read from.
*/
struct argparse {
int argc; /* Number of arguments. Mutable. */
int argn; /* Current argument index. */
char **argv_get; /* Argument value being read. */
char **argv_put; /* Argument value being written.
* argv_put <= argv_get. */
};
static void
initialize_argparse(struct argparse *ap, int argc, char **argv)
{
ap->argc = argc;
ap->argn = 0;
ap->argv_get = ap->argv_put = argv;
}
/* Finish argument parsing, without processing the remaining arguments.
* Write new argument count into _argc. */
static void
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
{
/* Move the remaining arguments */
for(; ap->argn < ap->argc; ap->argn++)
*ap->argv_put++ = *ap->argv_get++;
/* Update the argument count */
*_argc = ap->argc;
/* Insert a terminating NULL */
argv[ap->argc] = NULL;
}
/* Delete the current argument. The argument will not be visible
* when argument parsing is done. */
static void
delete_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "delete_argument\n");
}
ap->argc--;
ap->argv_get++;
}
/* Go to the next argument. Also, move the current argument to its
* final location in argv. */
static void
next_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "next_argument\n");
}
/* Move argument to its new location. */
*ap->argv_put++ = *ap->argv_get++;
ap->argn++;
}
static int
is_end_of_arguments(struct argparse *ap)
{
return ap->argn == ap->argc;
}
/* Get the current argument */
static char *
get_argument(struct argparse *ap)
{
return *ap->argv_get;
}
/* Get the current argument, and also delete it */
static char *
consume_argument(struct argparse *ap)
{
char *ret = get_argument(ap);
delete_argument(ap);
return ret;
}
/****************************************************************************/
/* The result of parsing a command-line argument */
typedef enum {
ARGPARSE_OK, /* Success */
ARGPARSE_ERROR, /* Error */
ARGPARSE_DONE /* Success, and do not continue parsing */
} result;
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
/* A command-line option */
struct option {
char short_name; /* If not 0, the one-character
* name of this option */
const char *long_name; /* If not NULL, the long name of this option */
parse_action *action; /* What to do when this option occurs.
* Sentinel value is NULL.
*/
};
/* Output file
*
* -o FILE
*/
static result
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-o'\n");
return ARGPARSE_ERROR;
}
/* Replace the output file name */
free(params->outFile);
params->outFile = strdup(consume_argument(ap));
return ARGPARSE_OK;
}
/* Input files
*
* -i FILE,FILE,...
*/
static result
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-i'\n");
return ARGPARSE_ERROR;
}
/* Replace the input file list */
pb_FreeStringArray(params->inpFiles);
params->inpFiles = read_string_array(consume_argument(ap));
return ARGPARSE_OK;
}
/* End of options
*
* --
*/
static result
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
{
return ARGPARSE_DONE;
}
/* OpenCL device
*
* --device X
*/
static result
parse_device(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a device */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--device'\n");
return ARGPARSE_ERROR;
}
char *device_string = consume_argument(ap);
struct pb_DeviceParam *device_param = read_device_param(device_string);
if (!device_param) {
report_parse_error("Unrecognized device specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreeDeviceParam(params->device);
params->device = device_param;
return ARGPARSE_OK;
}
static result
parse_platform(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a platform */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--platform'\n");
return ARGPARSE_ERROR;
}
char *platform_string = consume_argument(ap);
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
if (!platform_param) {
report_parse_error("Unrecognized platform specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreePlatformParam(params->platform);
params->platform = platform_param;
return ARGPARSE_OK;
}
static struct option options[] = {
{ 'o', NULL, &parse_output_file },
{ 'i', NULL, &parse_input_files },
{ '-', NULL, &parse_end_options },
{ 0, "device", &parse_device },
{ 0, "platform", &parse_platform },
{ 0, NULL, NULL }
};
static int
is_last_option(struct option *op)
{
return op->action == NULL;
}
/****************************************************************************/
/* Parse command-line parameters.
* Return zero on error, nonzero otherwise.
* On error, the other outputs may be invalid.
*
* The information collected from parameters is used to update
* 'ret'. 'ret' should be initialized.
*
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
*/
static int
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
{
char *err_message;
struct argparse ap;
/* Each argument */
initialize_argparse(&ap, *_argc, argv);
while(!is_end_of_arguments(&ap)) {
result arg_result; /* Result of parsing this option */
char *arg = get_argument(&ap);
/* Process this argument */
if (arg[0] == '-') {
/* Single-character flag */
if ((arg[1] != 0) && (arg[2] == 0)) {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching short option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->short_name == arg[1]) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
/* Long flag */
if (arg[1] == '-') {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching long option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
}
else {
/* Other arguments are ignored */
next_argument(&ap);
arg_result = ARGPARSE_OK;
goto option_was_processed;
}
option_was_processed:
/* Decide what to do next based on 'arg_result' */
switch(arg_result) {
case ARGPARSE_OK:
/* Continue processing */
break;
case ARGPARSE_ERROR:
/* Error exit from the function */
return 0;
case ARGPARSE_DONE:
/* Normal exit from the argument parsing loop */
goto end_of_options;
}
} /* end for each argument */
/* If all arguments were processed, then normal exit from the loop */
end_of_options:
finalize_argparse(&ap, _argc, argv);
return 1;
}
/*****************************************************************************/
/* Other exported functions */
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv)
{
struct pb_Parameters *ret =
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
/* Initialize the parameters structure */
ret->outFile = NULL;
ret->inpFiles = (char **)malloc(sizeof(char *));
ret->inpFiles[0] = NULL;
ret->platform = NULL;
ret->device = NULL;
/* Read parameters and update _argc, argv */
if (!pb_ParseParameters(ret, _argc, argv)) {
/* Parse error */
pb_FreeParameters(ret);
return NULL;
}
return ret;
}
int
pb_Parameters_CountInputs(struct pb_Parameters *p)
{
int n;
for (n = 0; p->inpFiles[n]; n++);
return n;
}

View File

@@ -0,0 +1,37 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef ATOM_H
#define ATOM_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct Atom_t {
float x, y, z, q;
} Atom;
typedef struct Atoms_t {
Atom *atoms;
int size;
} Atoms;
typedef struct Vec3_t {
float x, y, z;
} Vec3;
Atoms *read_atom_file(const char *fname);
void free_atom(Atoms *atom);
void get_atom_extent(Vec3 *lo, Vec3 *hi, Atoms *atom);
#ifdef __cplusplus
}
#endif
#endif /* ATOM_H */

View File

@@ -0,0 +1,195 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <parboil.h>
#include "atom.h"
#include "cutoff.h"
#undef DEBUG_PASS_RATE
#define CHECK_CYLINDER_CPU
#define CELLEN 4.f
#define INV_CELLEN (1.f/CELLEN)
extern int cpu_compute_cutoff_potential_lattice(
Lattice *lattice, /* the lattice */
float cutoff, /* cutoff distance */
Atoms *atoms /* array of atoms */
)
{
int nx = lattice->dim.nx;
int ny = lattice->dim.ny;
int nz = lattice->dim.nz;
float xlo = lattice->dim.lo.x;
float ylo = lattice->dim.lo.y;
float zlo = lattice->dim.lo.z;
float gridspacing = lattice->dim.h;
int natoms = atoms->size;
Atom *atom = atoms->atoms;
const float a2 = cutoff * cutoff;
const float inv_a2 = 1.f / a2;
float s;
const float inv_gridspacing = 1.f / gridspacing;
const int radius = (int) ceilf(cutoff * inv_gridspacing) - 1;
/* lattice point radius about each atom */
int n;
int i, j, k;
int ia, ib, ic;
int ja, jb, jc;
int ka, kb, kc;
int index;
int koff, jkoff;
float x, y, z, q;
float dx, dy, dz;
float dz2, dydz2, r2;
float e;
float xstart, ystart;
float *pg;
int gindex;
int ncell, nxcell, nycell, nzcell;
int *first, *next;
float inv_cellen = INV_CELLEN;
Vec3 minext, maxext; /* Extent of atom bounding box */
float xmin, ymin, zmin;
float xmax, ymax, zmax;
#if DEBUG_PASS_RATE
unsigned long long pass_count = 0;
unsigned long long fail_count = 0;
#endif
/* find min and max extent */
get_atom_extent(&minext, &maxext, atoms);
/* number of cells in each dimension */
nxcell = (int) floorf((maxext.x-minext.x) * inv_cellen) + 1;
nycell = (int) floorf((maxext.y-minext.y) * inv_cellen) + 1;
nzcell = (int) floorf((maxext.z-minext.z) * inv_cellen) + 1;
ncell = nxcell * nycell * nzcell;
/* allocate for cursor link list implementation */
first = (int *) malloc(ncell * sizeof(int));
for (gindex = 0; gindex < ncell; gindex++) {
first[gindex] = -1;
}
next = (int *) malloc(natoms * sizeof(int));
for (n = 0; n < natoms; n++) {
next[n] = -1;
}
/* geometric hashing */
for (n = 0; n < natoms; n++) {
if (0==atom[n].q) continue; /* skip any non-contributing atoms */
i = (int) floorf((atom[n].x - minext.x) * inv_cellen);
j = (int) floorf((atom[n].y - minext.y) * inv_cellen);
k = (int) floorf((atom[n].z - minext.z) * inv_cellen);
gindex = (k*nycell + j)*nxcell + i;
next[n] = first[gindex];
first[gindex] = n;
}
/* traverse the grid cells */
for (gindex = 0; gindex < ncell; gindex++) {
for (n = first[gindex]; n != -1; n = next[n]) {
x = atom[n].x - xlo;
y = atom[n].y - ylo;
z = atom[n].z - zlo;
q = atom[n].q;
/* find closest grid point with position less than or equal to atom */
ic = (int) (x * inv_gridspacing);
jc = (int) (y * inv_gridspacing);
kc = (int) (z * inv_gridspacing);
/* find extent of surrounding box of grid points */
ia = ic - radius;
ib = ic + radius + 1;
ja = jc - radius;
jb = jc + radius + 1;
ka = kc - radius;
kb = kc + radius + 1;
/* trim box edges so that they are within grid point lattice */
if (ia < 0) ia = 0;
if (ib >= nx) ib = nx-1;
if (ja < 0) ja = 0;
if (jb >= ny) jb = ny-1;
if (ka < 0) ka = 0;
if (kb >= nz) kb = nz-1;
/* loop over surrounding grid points */
xstart = ia*gridspacing - x;
ystart = ja*gridspacing - y;
dz = ka*gridspacing - z;
for (k = ka; k <= kb; k++, dz += gridspacing) {
koff = k*ny;
dz2 = dz*dz;
dy = ystart;
for (j = ja; j <= jb; j++, dy += gridspacing) {
jkoff = (koff + j)*nx;
dydz2 = dy*dy + dz2;
#ifdef CHECK_CYLINDER_CPU
if (dydz2 >= a2) continue;
#endif
dx = xstart;
index = jkoff + ia;
pg = lattice->lattice + index;
#if defined(__INTEL_COMPILER)
for (i = ia; i <= ib; i++, pg++, dx += gridspacing) {
r2 = dx*dx + dydz2;
s = (1.f - r2 * inv_a2) * (1.f - r2 * inv_a2);
e = q * (1/sqrtf(r2)) * s;
*pg += (r2 < a2 ? e : 0); /* LOOP VECTORIZED!! */
}
#else
for (i = ia; i <= ib; i++, pg++, dx += gridspacing) {
r2 = dx*dx + dydz2;
if (r2 >= a2)
{
#ifdef DEBUG_PASS_RATE
fail_count++;
#endif
continue;
}
#ifdef DEBUG_PASS_RATE
pass_count++;
#endif
s = (1.f - r2 * inv_a2);
e = q * (1/sqrtf(r2)) * s * s;
*pg += e;
}
#endif
}
} /* end loop over surrounding grid points */
} /* end loop over atoms in a gridcell */
} /* end loop over gridcells */
/* free memory */
free(next);
free(first);
/* For debugging: print the number of times that the test passed/failed */
#ifdef DEBUG_PASS_RATE
printf ("Pass :%lld\n", pass_count);
printf ("Fail :%lld\n", fail_count);
#endif
return 0;
}

View File

@@ -0,0 +1,499 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <parboil.h>
#include "atom.h"
#include "cutoff.h"
#include "macros.h"
#include "ocl.h"
// OpenCL 1.1 support for int3 is not uniform on all implementations, so
// we use int4 instead. Only the 'x', 'y', and 'z' fields of xyz are used.
typedef cl_int4 xyz;
//extern "C" int gpu_compute_cutoff_potential_lattice(
int gpu_compute_cutoff_potential_lattice(
struct pb_TimerSet *timers,
Lattice *lattice, /* the lattice */
float cutoff, /* cutoff distance */
Atoms *atoms, /* array of atoms */
int verbose, /* print info/debug messages */
struct pb_Parameters *parameters
)
{
int nx = lattice->dim.nx;
int ny = lattice->dim.ny;
int nz = lattice->dim.nz;
float xlo = lattice->dim.lo.x;
float ylo = lattice->dim.lo.y;
float zlo = lattice->dim.lo.z;
float h = lattice->dim.h;
int natoms = atoms->size;
Atom *atom = atoms->atoms;
xyz nbrlist[NBRLIST_MAXLEN];
int nbrlistlen = 0;
int binHistoFull[BIN_DEPTH+1] = { 0 }; /* clear every array element */
int binHistoCover[BIN_DEPTH+1] = { 0 }; /* clear every array element */
int num_excluded = 0;
int xRegionDim, yRegionDim, zRegionDim;
int xRegionIndex, yRegionIndex, zRegionIndex;
int xOffset, yOffset, zOffset;
int lnx, lny, lnz, lnall;
float *regionZeroAddr, *thisRegion;
cl_mem regionZeroCl;
int index, indexRegion;
int c;
xyz binDim;
int nbins;
cl_float4 *binBaseAddr, *binZeroAddr;
cl_mem binBaseCl, binZeroCl;
int *bincntBaseAddr, *bincntZeroAddr;
Atoms *extra = NULL;
cl_mem NbrListLen;
cl_mem NbrList;
int i, j, k, n;
int sum, total;
float avgFillFull, avgFillCover;
const float cutoff2 = cutoff * cutoff;
const float inv_cutoff2 = 1.f / cutoff2;
size_t gridDim[3], blockDim[3];
// The "compute" timer should be active upon entry to this function
/* pad lattice to be factor of 8 in each dimension */
xRegionDim = (int) ceilf(nx/8.f);
yRegionDim = (int) ceilf(ny/8.f);
zRegionDim = (int) ceilf(nz/8.f);
lnx = 8 * xRegionDim;
lny = 8 * yRegionDim;
lnz = 8 * zRegionDim;
lnall = lnx * lny * lnz;
/* will receive energies from OpenCL */
regionZeroAddr = (float *) malloc(lnall * sizeof(float));
/* create bins */
c = (int) ceil(cutoff * BIN_INVLEN); /* count extra bins around lattice */
binDim.x = (int) ceil(lnx * h * BIN_INVLEN) + 2*c;
binDim.y = (int) ceil(lny * h * BIN_INVLEN) + 2*c;
binDim.z = (int) ceil(lnz * h * BIN_INVLEN) + 2*c;
nbins = binDim.x * binDim.y * binDim.z;
binBaseAddr = (cl_float4 *) calloc(nbins * BIN_DEPTH, sizeof(cl_float4));
binZeroAddr = binBaseAddr + ((c * binDim.y + c) * binDim.x + c) * BIN_DEPTH;
bincntBaseAddr = (int *) calloc(nbins, sizeof(int));
bincntZeroAddr = bincntBaseAddr + (c * binDim.y + c) * binDim.x + c;
/* create neighbor list */
if (ceilf(BIN_LENGTH / (8*h)) == floorf(BIN_LENGTH / (8*h))) {
float s = sqrtf(3);
float r2 = (cutoff + s*BIN_LENGTH) * (cutoff + s*BIN_LENGTH);
int cnt = 0;
/* develop neighbor list around 1 cell */
if (2*c + 1 > NBRLIST_DIM) {
fprintf(stderr, "must have cutoff <= %f\n",
(NBRLIST_DIM-1)/2 * BIN_LENGTH);
return -1;
}
for (k = -c; k <= c; k++) {
for (j = -c; j <= c; j++) {
for (i = -c; i <= c; i++) {
if ((i*i + j*j + k*k)*BIN_LENGTH*BIN_LENGTH >= r2) continue;
nbrlist[cnt].x = i;
nbrlist[cnt].y = j;
nbrlist[cnt].z = k;
cnt++;
}
}
}
nbrlistlen = cnt;
}
else if (8*h <= 2*BIN_LENGTH) {
float s = 2.f*sqrtf(3);
float r2 = (cutoff + s*BIN_LENGTH) * (cutoff + s*BIN_LENGTH);
int cnt = 0;
/* develop neighbor list around 3-cube of cells */
if (2*c + 3 > NBRLIST_DIM) {
fprintf(stderr, "must have cutoff <= %f\n",
(NBRLIST_DIM-3)/2 * BIN_LENGTH);
return -1;
}
for (k = -c; k <= c; k++) {
for (j = -c; j <= c; j++) {
for (i = -c; i <= c; i++) {
if ((i*i + j*j + k*k)*BIN_LENGTH*BIN_LENGTH >= r2) continue;
nbrlist[cnt].x = i;
nbrlist[cnt].y = j;
nbrlist[cnt].z = k;
cnt++;
}
}
}
nbrlistlen = cnt;
}
else {
fprintf(stderr, "must have h <= %f\n", 0.25 * BIN_LENGTH);
return -1;
}
/* perform geometric hashing of atoms into bins */
{
/* array of extra atoms, permit average of one extra per bin */
Atom *extra_atoms = (Atom *) calloc(nbins, sizeof(Atom));
int extra_len = 0;
for (n = 0; n < natoms; n++) {
cl_float4 p;
p.x = atom[n].x - xlo;
p.y = atom[n].y - ylo;
p.z = atom[n].z - zlo;
p.w = atom[n].q;
i = (int) floorf(p.x * BIN_INVLEN);
j = (int) floorf(p.y * BIN_INVLEN);
k = (int) floorf(p.z * BIN_INVLEN);
if (i >= -c && i < binDim.x - c &&
j >= -c && j < binDim.y - c &&
k >= -c && k < binDim.z - c &&
atom[n].q != 0) {
int index = (k * binDim.y + j) * binDim.x + i;
cl_float4 *bin = binZeroAddr + index * BIN_DEPTH;
int bindex = bincntZeroAddr[index];
if (bindex < BIN_DEPTH) {
/* copy atom into bin and increase counter for this bin */
bin[bindex] = p;
bincntZeroAddr[index]++;
}
else {
/* add index to array of extra atoms to be computed with CPU */
if (extra_len >= nbins) {
fprintf(stderr, "exceeded space for storing extra atoms\n");
return -1;
}
extra_atoms[extra_len] = atom[n];
extra_len++;
}
}
else {
/* excluded atoms are either outside bins or neutrally charged */
num_excluded++;
}
}
/* Save result */
extra = (Atoms *)malloc(sizeof(Atoms));
extra->atoms = extra_atoms;
extra->size = extra_len;
}
/* bin stats */
sum = total = 0;
for (n = 0; n < nbins; n++) {
binHistoFull[ bincntBaseAddr[n] ]++;
sum += bincntBaseAddr[n];
total += BIN_DEPTH;
}
avgFillFull = sum / (float) total;
sum = total = 0;
for (k = 0; k < binDim.z - 2*c; k++) {
for (j = 0; j < binDim.y - 2*c; j++) {
for (i = 0; i < binDim.x - 2*c; i++) {
int index = (k * binDim.y + j) * binDim.x + i;
binHistoCover[ bincntZeroAddr[index] ]++;
sum += bincntZeroAddr[index];
total += BIN_DEPTH;
}
}
}
avgFillCover = sum / (float) total;
if (verbose) {
/* report */
printf("number of atoms = %d\n", natoms);
printf("lattice spacing = %g\n", h);
printf("cutoff distance = %g\n", cutoff);
printf("\n");
printf("requested lattice dimensions = %d %d %d\n", nx, ny, nz);
printf("requested space dimensions = %g %g %g\n", nx*h, ny*h, nz*h);
printf("expanded lattice dimensions = %d %d %d\n", lnx, lny, lnz);
printf("expanded space dimensions = %g %g %g\n", lnx*h, lny*h, lnz*h);
printf("number of bytes for lattice data = %u\n", (unsigned int) (lnall*sizeof(float)));
printf("\n");
printf("bin padding thickness = %d\n", c);
printf("bin cover dimensions = %d %d %d\n",
binDim.x - 2*c, binDim.y - 2*c, binDim.z - 2*c);
printf("bin full dimensions = %d %d %d\n", binDim.x, binDim.y, binDim.z);
printf("number of bins = %d\n", nbins);
printf("total number of atom slots = %d\n", nbins * BIN_DEPTH);
printf("%% overhead space = %g\n",
(natoms / (double) (nbins * BIN_DEPTH)) * 100);
printf("number of bytes for bin data = %u\n",
(unsigned int)(nbins * BIN_DEPTH * sizeof(cl_float4)));
printf("\n");
printf("bin histogram with padding:\n");
sum = 0;
for (n = 0; n <= BIN_DEPTH; n++) {
printf(" number of bins with %d atoms: %d\n", n, binHistoFull[n]);
sum += binHistoFull[n];
}
printf(" total number of bins: %d\n", sum);
printf(" %% average fill: %g\n", avgFillFull * 100);
printf("\n");
printf("bin histogram excluding padding:\n");
sum = 0;
for (n = 0; n <= BIN_DEPTH; n++) {
printf(" number of bins with %d atoms: %d\n", n, binHistoCover[n]);
sum += binHistoCover[n];
}
printf(" total number of bins: %d\n", sum);
printf(" %% average fill: %g\n", avgFillCover * 100);
printf("\n");
printf("number of extra atoms = %d\n", extra->size);
printf("%% atoms that are extra = %g\n", (extra->size / (double) natoms) * 100);
printf("\n");
/* sanity check on bins */
sum = 0;
for (n = 0; n <= BIN_DEPTH; n++) {
sum += n * binHistoFull[n];
}
sum += extra->size + num_excluded;
printf("sanity check on bin histogram with edges: "
"sum + others = %d\n", sum);
sum = 0;
for (n = 0; n <= BIN_DEPTH; n++) {
sum += n * binHistoCover[n];
}
sum += extra->size + num_excluded;
printf("sanity check on bin histogram excluding edges: "
"sum + others = %d\n", sum);
printf("\n");
/* neighbor list */
printf("neighbor list length = %d\n", nbrlistlen);
printf("\n");
}
printf("Ok!\n");
pb_Context* pb_context;
pb_context = pb_InitOpenCLContext(parameters);
if (pb_context == NULL) {
fprintf (stderr, "Error: No OpenCL platform/device can be found.");
return -1;
}
printf("Ok!\n");
cl_int clStatus;
cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
cl_context clContext = (cl_context) pb_context->clContext;
cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
CHECK_ERROR("clCreateCommandQueue")
pb_SetOpenCL(&clContext, &clCommandQueue);
//const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
//cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
cl_program clProgram = clCreateProgramWithBuiltInKernels(
clContext, 1, &clDevice, "opencl_cutoff_potential_lattice", &clStatus);
CHECK_ERROR("clCreateProgramWithSource")
char clOptions[50];
sprintf(clOptions,"-I src/opencl_base"); //-cl-nv-verbose
clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
if (clStatus != CL_SUCCESS) {
size_t string_size = 0;
clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG,
0, NULL, &string_size);
char* string = (char*)malloc(string_size*sizeof(char));
clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG,
string_size, string, NULL);
puts(string);
}
CHECK_ERROR("clBuildProgram")
cl_kernel clKernel = clCreateKernel(clProgram,"opencl_cutoff_potential_lattice",&clStatus);
CHECK_ERROR("clCreateKernel")
/* setup OpenCL kernel parameters */
blockDim[0] = 8;
blockDim[1] = 8;
blockDim[2] = 2;
gridDim[0] = 4 * xRegionDim * blockDim[0];
gridDim[1] = yRegionDim * blockDim[1];
gridDim[2] = 1 * blockDim[2];
/* allocate and initialize memory on OpenCL device */
pb_SwitchToTimer(timers, pb_TimerID_COPY);
if (verbose) {
printf("Allocating %.2fMB on OpenCL device for potentials\n",
lnall * sizeof(float) / (double) (1024*1024));
}
regionZeroCl = clCreateBuffer(clContext,CL_MEM_WRITE_ONLY,lnall*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
// clMemSet(clCommandQueue,regionZeroCl,0,lnall*sizeof(float));
if (verbose) {
printf("Allocating %.2fMB on OpenCL device for atom bins\n",
nbins * BIN_DEPTH * sizeof(cl_float4) / (double) (1024*1024));
}
binBaseCl = clCreateBuffer(clContext,CL_MEM_READ_ONLY,nbins*BIN_DEPTH*sizeof(cl_float4),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue,binBaseCl,CL_TRUE,0,nbins*BIN_DEPTH*sizeof(cl_float4),binBaseAddr,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
//Sub buffers are not supported in OpenCL v1.0
int offset = ((c * binDim.y + c) * binDim.x + c) * BIN_DEPTH;
NbrListLen = clCreateBuffer(clContext,CL_MEM_READ_ONLY,sizeof(int),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue,NbrListLen,CL_TRUE,0,sizeof(int),&nbrlistlen,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
NbrList = clCreateBuffer(clContext,CL_MEM_READ_ONLY,NBRLIST_MAXLEN*sizeof(xyz),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue,NbrList,CL_TRUE,0,nbrlistlen*sizeof(xyz),nbrlist,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
if (verbose)
printf("\n");
clStatus = clSetKernelArg(clKernel,0,sizeof(int),&(binDim.x));
clStatus = clSetKernelArg(clKernel,1,sizeof(int),&(binDim.y));
clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),&binBaseCl);
clStatus = clSetKernelArg(clKernel,3,sizeof(int),&offset);
clStatus = clSetKernelArg(clKernel,4,sizeof(float),&h);
clStatus = clSetKernelArg(clKernel,5,sizeof(float),&cutoff2);
clStatus = clSetKernelArg(clKernel,6,sizeof(float),&inv_cutoff2);
clStatus = clSetKernelArg(clKernel,7,sizeof(cl_mem),&regionZeroCl);
clStatus = clSetKernelArg(clKernel,9,sizeof(cl_mem),&NbrListLen);
clStatus = clSetKernelArg(clKernel,10,sizeof(cl_mem),&NbrList);
CHECK_ERROR("clSetKernelArg")
printf("Ok!!\n");
/* loop over z-dimension, invoke OpenCL kernel for each x-y plane */
pb_SwitchToTimer(timers, pb_TimerID_KERNEL);
printf("Invoking OpenCL kernel on %d region planes...\n", zRegionDim);
for (zRegionIndex = 0; zRegionIndex < zRegionDim; zRegionIndex++) {
printf(" computing plane %d\r", zRegionIndex);
fflush(stdout);
clStatus = clSetKernelArg(clKernel,8,sizeof(int),&zRegionIndex);
CHECK_ERROR("clSetKernelArg")
printf("Ok**!2\n");
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,gridDim,blockDim,0,NULL,NULL);
printf("Ok**!2\n");
CHECK_ERROR("clEnqueueNDRangeKernel")
printf("Ok**!2\n");
clStatus = clFinish(clCommandQueue);
printf("Ok**!2\n");
CHECK_ERROR("clFinish")
}
printf("Ok++!\n");
printf("Finished OpenCL kernel calls \n");
/* copy result regions from OpenCL device */
pb_SwitchToTimer(timers, pb_TimerID_COPY);
clStatus = clEnqueueReadBuffer(clCommandQueue,regionZeroCl,CL_TRUE,0,lnall*sizeof(float),regionZeroAddr,0,NULL,NULL);
CHECK_ERROR("clEnqueueReadBuffer")
/* free OpenCL memory allocations */
clStatus = clReleaseMemObject(regionZeroCl);
clStatus = clReleaseMemObject(binBaseCl);
clStatus = clReleaseMemObject(NbrListLen);
clStatus = clReleaseMemObject(NbrList);
CHECK_ERROR("clReleaseMemObject")
clStatus = clReleaseKernel(clKernel);
clStatus = clReleaseProgram(clProgram);
clStatus = clReleaseCommandQueue(clCommandQueue);
clStatus = clReleaseContext(clContext);
//free((void*)clSource[0]);
/* transpose regions back into lattice */
pb_SwitchToTimer(timers, pb_TimerID_COMPUTE);
for (k = 0; k < nz; k++) {
zRegionIndex = (k >> 3);
zOffset = (k & 7);
for (j = 0; j < ny; j++) {
yRegionIndex = (j >> 3);
yOffset = (j & 7);
for (i = 0; i < nx; i++) {
xRegionIndex = (i >> 3);
xOffset = (i & 7);
thisRegion = regionZeroAddr
+ ((zRegionIndex * yRegionDim + yRegionIndex) * xRegionDim
+ xRegionIndex) * REGION_SIZE;
indexRegion = (zOffset * 8 + yOffset) * 8 + xOffset;
index = (k * ny + j) * nx + i;
lattice->lattice[index] = thisRegion[indexRegion];
}
}
}
/* handle extra atoms */
if (extra->size > 0) {
printf("computing extra atoms on CPU\n");
if (cpu_compute_cutoff_potential_lattice(lattice, cutoff, extra)) {
fprintf(stderr, "cpu_compute_cutoff_potential_lattice() failed "
"for extra atoms\n");
return -1;
}
printf("\n");
}
/* cleanup memory allocations */
free(regionZeroAddr);
free(binBaseAddr);
free(bincntBaseAddr);
free_atom(extra);
return 0;
}

View File

@@ -0,0 +1,72 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef CUTOFF_H
#define CUTOFF_H
#ifdef __cplusplus
extern "C" {
#endif
#define SHIFTED
/* A structure to record how points in 3D space map to array
elements. Array element (z, y, x)
where 0 <= x < nx, 0 <= y < ny, 0 <= z < nz
maps to coordinate (xlo, ylo, zlo) + h * (x, y, z).
*/
typedef struct LatticeDim_t {
/* Number of lattice points in x, y, z dimensions */
int nx, ny, nz;
/* Lowest corner of lattice */
Vec3 lo;
/* Lattice spacing */
float h;
} LatticeDim;
/* An electric potential field sampled on a regular grid. The
lattice size and grid point positions are specified by 'dim'.
*/
typedef struct Lattice_t {
LatticeDim dim;
float *lattice;
} Lattice;
LatticeDim lattice_from_bounding_box(Vec3 lo, Vec3 hi, float h);
Lattice *create_lattice(LatticeDim dim);
void destroy_lattice(Lattice *);
int gpu_compute_cutoff_potential_lattice(
struct pb_TimerSet *timers,
Lattice *lattice,
float cutoff, /* cutoff distance */
Atoms *atom, /* array of atoms */
int verbose, /* print info/debug messages */
struct pb_Parameters *parameters
);
int cpu_compute_cutoff_potential_lattice(
Lattice *lattice, /* the lattice */
float cutoff, /* cutoff distance */
Atoms *atoms /* array of atoms */
);
int remove_exclusions(
Lattice *lattice, /* the lattice */
float exclcutoff, /* exclusion cutoff distance */
Atoms *atom /* array of atoms */
);
#ifdef __cplusplus
}
#endif
#endif /* CUTOFF_H */

View File

@@ -0,0 +1,157 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <parboil.h>
#include "atom.h"
#include "cutoff.h"
#define CELLEN 4.f
#define INV_CELLEN (1.f/CELLEN)
extern int remove_exclusions(
Lattice *lattice, /* the lattice */
float cutoff, /* exclusion cutoff distance */
Atoms *atoms /* array of atoms */
)
{
int nx = lattice->dim.nx;
int ny = lattice->dim.ny;
int nz = lattice->dim.nz;
float xlo = lattice->dim.lo.x;
float ylo = lattice->dim.lo.y;
float zlo = lattice->dim.lo.z;
float gridspacing = lattice->dim.h;
Atom *atom = atoms->atoms;
const float a2 = cutoff * cutoff;
const float inv_gridspacing = 1.f / gridspacing;
const int radius = (int) ceilf(cutoff * inv_gridspacing) - 1;
/* lattice point radius about each atom */
int n;
int i, j, k;
int ia, ib, ic;
int ja, jb, jc;
int ka, kb, kc;
int index;
int koff, jkoff;
float x, y, z, q;
float dx, dy, dz;
float dz2, dydz2, r2;
float e;
float xstart, ystart;
float *pg;
int gindex;
int ncell, nxcell, nycell, nzcell;
int *first, *next;
float inv_cellen = INV_CELLEN;
Vec3 minext, maxext;
/* find min and max extent */
get_atom_extent(&minext, &maxext, atoms);
/* number of cells in each dimension */
nxcell = (int) floorf((maxext.x-minext.x) * inv_cellen) + 1;
nycell = (int) floorf((maxext.y-minext.y) * inv_cellen) + 1;
nzcell = (int) floorf((maxext.z-minext.z) * inv_cellen) + 1;
ncell = nxcell * nycell * nzcell;
/* allocate for cursor link list implementation */
first = (int *) malloc(ncell * sizeof(int));
for (gindex = 0; gindex < ncell; gindex++) {
first[gindex] = -1;
}
next = (int *) malloc(atoms->size * sizeof(int));
for (n = 0; n < atoms->size; n++) {
next[n] = -1;
}
/* geometric hashing */
for (n = 0; n < atoms->size; n++) {
if (0==atom[n].q) continue; /* skip any non-contributing atoms */
i = (int) floorf((atom[n].x - minext.x) * inv_cellen);
j = (int) floorf((atom[n].y - minext.y) * inv_cellen);
k = (int) floorf((atom[n].z - minext.z) * inv_cellen);
gindex = (k*nycell + j)*nxcell + i;
next[n] = first[gindex];
first[gindex] = n;
}
/* traverse the grid cells */
for (gindex = 0; gindex < ncell; gindex++) {
for (n = first[gindex]; n != -1; n = next[n]) {
x = atom[n].x - xlo;
y = atom[n].y - ylo;
z = atom[n].z - zlo;
q = atom[n].q;
/* find closest grid point with position less than or equal to atom */
ic = (int) (x * inv_gridspacing);
jc = (int) (y * inv_gridspacing);
kc = (int) (z * inv_gridspacing);
/* find extent of surrounding box of grid points */
ia = ic - radius;
ib = ic + radius + 1;
ja = jc - radius;
jb = jc + radius + 1;
ka = kc - radius;
kb = kc + radius + 1;
/* trim box edges so that they are within grid point lattice */
if (ia < 0) ia = 0;
if (ib >= nx) ib = nx-1;
if (ja < 0) ja = 0;
if (jb >= ny) jb = ny-1;
if (ka < 0) ka = 0;
if (kb >= nz) kb = nz-1;
/* loop over surrounding grid points */
xstart = ia*gridspacing - x;
ystart = ja*gridspacing - y;
dz = ka*gridspacing - z;
for (k = ka; k <= kb; k++, dz += gridspacing) {
koff = k*ny;
dz2 = dz*dz;
dy = ystart;
for (j = ja; j <= jb; j++, dy += gridspacing) {
jkoff = (koff + j)*nx;
dydz2 = dy*dy + dz2;
dx = xstart;
index = jkoff + ia;
pg = lattice->lattice + index;
for (i = ia; i <= ib; i++, pg++, dx += gridspacing) {
r2 = dx*dx + dydz2;
/* If atom and lattice point are too close, set the lattice value
* to zero */
if (r2 < a2) *pg = 0;
}
}
} /* end loop over surrounding grid points */
} /* end loop over atoms in a gridcell */
} /* end loop over gridcells */
/* free memory */
free(next);
free(first);
return 0;
}

View File

@@ -0,0 +1,55 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#include "gpu_info.h"
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm)
{
int max_thread;
int max_block=8;
if(major==1)
{
if(minor>=2)
max_thread=1024;
else
max_thread=768;
}
else if(major==2)
max_thread=1536;
else
//newer GPU //keep using 2.0
max_thread=1536;
int _grid;
int _thread;
if(task*pad>sm*max_thread)
{
_thread=max_thread/max_block;
_grid = ((task*pad+_thread-1)/_thread)*_thread;
}
else
{
_thread=pad;
_grid=task*pad;
}
thread[0]=_thread;
grid[0]=_grid;
}

View File

@@ -0,0 +1,20 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __GPUINFOH__
#define __GPUINFOH__
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm);
#endif

View File

@@ -0,0 +1,104 @@
/*
* potential lattice is decomposed into size 8^3 lattice point "regions"
*
* THIS IMPLEMENTATION: one thread per lattice point
* thread block size 128 gives 4 thread blocks per region
* kernel is invoked for each x-y plane of regions,
* where gridDim.x is 4*(x region dimension) so that blockIdx.x
* can absorb the z sub-region index in its 2 lowest order bits
*
* Regions are stored contiguously in memory in row-major order
*
* The bins have to not only cover the region, but they need to surround
* the outer edges so that region sides and corners can still use
* neighbor list stencil. The binZeroAddr is actually a shifted pointer into
* the bin array (binZeroAddr = binBaseAddr + (c*binDim_y + c)*binDim_x + c)
* where c = ceil(cutoff / binsize). This allows for negative offsets to
* be added to myBinIndex.
*
* The (0,0,0) spatial origin corresponds to lower left corner of both
* regionZeroAddr and binZeroAddr. The atom coordinates are translated
* during binning to enforce this assumption.
*/
#include "macros.h"
// OpenCL 1.1 support for int3 is not uniform on all implementations, so
// we use int4 instead. Only the 'x', 'y', and 'z' fields of xyz are used.
typedef int4 xyz;
__kernel void opencl_cutoff_potential_lattice(
int binDim_x,
int binDim_y,
__global float4 *binBaseAddr,
int offset,
float h, /* lattice spacing */
float cutoff2, /* square of cutoff distance */
float inv_cutoff2,
__global float *regionZeroAddr, /* address of lattice regions starting at origin */
int zRegionIndex,
__constant int *NbrListLen,
__constant xyz *NbrList
)
{
__global float4* binZeroAddr = binBaseAddr + offset;
__global float *myRegionAddr;
int Bx, By, Bz;
/* thread id */
const int tid = (get_local_id(2)*get_local_size(1) +
get_local_id(1))*get_local_size(0) + get_local_id(0);
/* this is the start of the sub-region indexed by tid */
myRegionAddr = regionZeroAddr + ((zRegionIndex*get_num_groups(1)
+ get_group_id(1))*(get_num_groups(0)>>2) + (get_group_id(0)>>2))*REGION_SIZE
+ (get_group_id(0)&3)*SUB_REGION_SIZE;
/* spatial coordinate of this lattice point */
float x = (8 * (get_group_id(0) >> 2) + get_local_id(0)) * h;
float y = (8 * get_group_id(1) + get_local_id(1)) * h;
float z = (8 * zRegionIndex + 2*(get_group_id(0)&3) + get_local_id(2)) * h;
float dx;
float dy;
float dz;
float r2;
float s;
int totalbins = 0;
/* bin number determined by center of region */
Bx = (int) floor((8 * (get_group_id(0) >> 2) + 4) * h * BIN_INVLEN);
By = (int) floor((8 * get_group_id(1) + 4) * h * BIN_INVLEN);
Bz = (int) floor((8 * zRegionIndex + 4) * h * BIN_INVLEN);
float energy = 0.f;
int bincnt;
for (bincnt = 0; bincnt < *NbrListLen; bincnt++) {
int i = Bx + NbrList[bincnt].x;
int j = By + NbrList[bincnt].y;
int k = Bz + NbrList[bincnt].z;
__global float4* p_global = binZeroAddr +
(((k*binDim_y + j)*binDim_x + i) * BIN_DEPTH);
int m;
for (m = 0; m < BIN_DEPTH; m++) {
float aq = p_global[m].w;
if (0.f != aq) {
dx = p_global[m].x - x;
dy = p_global[m].y - y;
dz = p_global[m].z - z;
r2 = dx*dx + dy*dy + dz*dz;
if (r2 < cutoff2) {
s = (1.f - r2 * inv_cutoff2);
energy += aq * rsqrt(r2) * s * s;
}
}
} /* end loop over atoms in bin */
} /* end loop over neighbor list */
/* store into global memory */
myRegionAddr[tid+0] = energy;
}

Binary file not shown.

View File

@@ -0,0 +1,69 @@
#ifndef __MACROSH__
#define __MACROSH__
#ifdef __DEVICE_EMULATION__
#define DEBUG
/* define which grid block and which thread to examine */
#define BX 0
#define BY 0
#define TX 0
#define TY 0
#define TZ 0
#define EMU(code) do { \
if (blockIdx.x==BX && blockIdx.y==BY && \
threadIdx.x==TX && threadIdx.y==TY && threadIdx.z==TZ) { \
code; \
} \
} while (0)
#define INT(n) printf("%s = %d\n", #n, n)
#define FLOAT(f) printf("%s = %g\n", #f, (double)(f))
#define INT3(n) printf("%s = %d %d %d\n", #n, (n).x, (n).y, (n).z)
#define FLOAT4(f) printf("%s = %g %g %g %g\n", #f, (double)(f).x, \
(double)(f).y, (double)(f).z, (double)(f).w)
#else
#define EMU(code)
#define INT(n)
#define FLOAT(f)
#define INT3(n)
#define FLOAT4(f)
#endif
/* report error from OpenCL */
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
/*
* neighbor list:
* stored in constant memory as table of offsets
* flat index addressing is computed by kernel
*
* reserve enough memory for 11^3 stencil of grid cells
* this fits within 16K of memory
*/
#define NBRLIST_DIM 11
#define NBRLIST_MAXLEN (NBRLIST_DIM * NBRLIST_DIM * NBRLIST_DIM)
/*
* atom bins cached into shared memory for processing
*
* this reserves 4K of shared memory for 32 atom bins each containing 8 atoms,
* should permit scheduling of up to 3 thread blocks per SM
*/
#define BIN_DEPTH 8 /* max number of atoms per bin */
#define BIN_SIZE 32 /* size of bin in floats */
#define BIN_CACHE_MAXLEN 32 /* max number of atom bins to cache */
#define BIN_LENGTH 4.f /* spatial length in Angstroms */
#define BIN_INVLEN (1.f / BIN_LENGTH)
/* assuming density of 1 atom / 10 A^3, expectation is 6.4 atoms per bin
* so that bin fill should be 80% (for non-empty regions of space) */
#define REGION_SIZE 512 /* number of floats in lattice region */
#define SUB_REGION_SIZE 128 /* number of floats in lattice sub-region */
#endif

View File

@@ -0,0 +1,194 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <parboil.h>
#include "atom.h"
#include "cutoff.h"
#include "output.h"
#define ERRTOL 1e-4f
#define NOKERNELS 0
#define CUTOFF1 1
#define CUTOFF6 32
#define CUTOFF6OVERLAP 64
#define CUTOFFCPU 16384
int appenddata(const char *filename, int size, double time) {
FILE *fp;
fp=fopen(filename, "a");
if (fp == NULL) {
printf("error appending to file %s..\n", filename);
return -1;
}
fprintf(fp, "%d %.3f\n", size, time);
fclose(fp);
return 0;
}
LatticeDim
lattice_from_bounding_box(Vec3 lo, Vec3 hi, float h)
{
LatticeDim ret;
ret.nx = (int) floorf((hi.x-lo.x)/h) + 1;
ret.ny = (int) floorf((hi.y-lo.y)/h) + 1;
ret.nz = (int) floorf((hi.z-lo.z)/h) + 1;
ret.lo = lo;
ret.h = h;
return ret;
}
Lattice *
create_lattice(LatticeDim dim)
{
int size;
Lattice *lat = (Lattice *)malloc(sizeof(Lattice));
if (lat == NULL) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
lat->dim = dim;
/* Round up the allocated size to a multiple of 8 */
size = ((dim.nx * dim.ny * dim.nz) + 7) & ~7;
lat->lattice = (float *)calloc(size, sizeof(float));
if (lat->lattice == NULL) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
return lat;
}
void
destroy_lattice(Lattice *lat)
{
if (lat) {
free(lat->lattice);
free(lat);
}
}
int main(int argc, char *argv[]) {
Atoms *atom;
LatticeDim lattice_dim;
Lattice *gpu_lattice;
Vec3 min_ext, max_ext; /* Bounding box of atoms */
Vec3 lo, hi; /* Bounding box with padding */
float h = 0.5f; /* Lattice spacing */
float cutoff = 12.f; /* Cutoff radius */
float exclcutoff = 1.f; /* Radius for exclusion */
float padding = 0.5f; /* Bounding box padding distance */
int n;
struct pb_Parameters *parameters;
struct pb_TimerSet timers;
/* Read input parameters */
parameters = pb_ReadParameters(&argc, argv);
if (parameters == NULL) {
exit(1);
}
parameters->inpFiles = (char **)malloc(sizeof(char *) * 2);
parameters->inpFiles[0] = (char *)malloc(100);
parameters->inpFiles[1] = NULL;
strncpy(parameters->inpFiles[0], "watbox.sl40.pqr", 100);
/* Expect one input file */
if (pb_Parameters_CountInputs(parameters) != 1) {
fprintf(stderr, "Expecting one input file\n");
exit(1);
}
pb_InitializeTimerSet(&timers);
pb_SwitchToTimer(&timers, pb_TimerID_IO);
printf("OK\n");
{
const char *pqrfilename = parameters->inpFiles[0];
if (!(atom = read_atom_file(pqrfilename))) {
fprintf(stderr, "read_atom_file() failed\n");
exit(1);
}
printf("read %d atoms from file '%s'\n", atom->size, pqrfilename);
}
printf("OK\n");
/* find extent of domain */
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
get_atom_extent(&min_ext, &max_ext, atom);
printf("extent of domain is:\n");
printf(" minimum %g %g %g\n", min_ext.x, min_ext.y, min_ext.z);
printf(" maximum %g %g %g\n", max_ext.x, max_ext.y, max_ext.z);
printf("padding domain by %g Angstroms\n", padding);
lo = (Vec3) {min_ext.x - padding, min_ext.y - padding, min_ext.z - padding};
hi = (Vec3) {max_ext.x + padding, max_ext.y + padding, max_ext.z + padding};
printf("domain lengths are %g by %g by %g\n", hi.x-lo.x, hi.y-lo.y, hi.z-lo.z);
lattice_dim = lattice_from_bounding_box(lo, hi, h);
gpu_lattice = create_lattice(lattice_dim);
printf("\n");
/*
* Run OpenCL kernel
* (Begin and end with COMPUTE timer active)
*/
if (gpu_compute_cutoff_potential_lattice(&timers, gpu_lattice, cutoff, atom, 0, parameters)) {
fprintf(stderr, "Computation failed\n");
exit(1);
}
/*
* Zero the lattice points that are too close to an atom. This is
* necessary for numerical stability.
*/
if (remove_exclusions(gpu_lattice, exclcutoff, atom)) {
fprintf(stderr, "remove_exclusions() failed for gpu lattice\n");
exit(1);
}
printf("\n");
pb_SwitchToTimer(&timers, pb_TimerID_IO);
/* Print output */
if (parameters->outFile) {
//write_lattice_summary(parameters->outFile, gpu_lattice);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
/* Cleanup */
destroy_lattice(gpu_lattice);
free_atom(atom);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(parameters);
return 0;
}

View File

@@ -0,0 +1,49 @@
#include <CL/cl.h>
#include <stdio.h>
#include <string.h>
#include "ocl.h"
char* readFile(const char* fileName)
{
FILE* fp;
fp = fopen(fileName,"r");
if(fp == NULL)
{
printf("Error 1!\n");
exit(1);
}
fseek(fp,0,SEEK_END);
long size = ftell(fp);
rewind(fp);
char* buffer = (char*)malloc(sizeof(char)*(size+1));
if(buffer == NULL)
{
printf("Error 2!\n");
fclose(fp);
exit(1);
}
size_t res = fread(buffer,1,size,fp);
if(res != size)
{
printf("Error 3!\n");
fclose(fp);
exit(1);
}
buffer[size] = 0;
fclose(fp);
return buffer;
}
void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
{
cl_int clStatus;
char* temp = (char*)malloc(size);
memset(temp,val,size);
clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
free(temp);
}

View File

@@ -0,0 +1,17 @@
#ifndef __OCLH__
#define __OCLH__
#include <stdlib.h>
void clMemSet(cl_command_queue, cl_mem, int, size_t);
char* readFile(const char*);
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
#endif

View File

@@ -0,0 +1,67 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <math.h>
#include <parboil.h>
#include "atom.h"
#include "cutoff.h"
void
write_lattice_summary(const char *filename, Lattice *lattice)
{
float *lattice_data = lattice->lattice;
int nx = lattice->dim.nx;
int ny = lattice->dim.ny;
int nz = lattice->dim.nz;
/* Open output file */
FILE *outfile = fopen(filename, "w");
if (outfile == NULL) {
fprintf(stderr, "Cannot open output file\n");
exit(1);
}
/* Write the sum of the the absolute values of all lattice potentials */
{
double abspotential = 0.0;
float tmp;
int i;
for (i = 0; i < nx * ny * nz; i++)
abspotential += fabs((double) lattice_data[i]);
tmp = (float) abspotential;
fwrite(&tmp, 1, sizeof(float), outfile);
}
/* Write the size of a lattice plane */
{
uint32_t tmp;
tmp = (uint32_t) (lattice->dim.nx * lattice->dim.ny);
fwrite(&tmp, 1, sizeof(uint32_t), outfile);
}
/* Write the plane of lattice data at z=0 and z = nz-1 */
{
int plane_size = nx * ny;
fwrite(lattice_data, plane_size, sizeof(float), outfile);
fwrite(lattice_data + (nz-1) * plane_size, plane_size, sizeof(float),
outfile);
}
/* Cleanup */
fclose(outfile);
}

View File

@@ -0,0 +1,25 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef OUTPUT_H
#define OUTPUT_H
#include "cutoff.h"
#ifdef __cplusplus
extern "C" {
#endif
void
write_lattice_summary(const char *filename, Lattice *lattice);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,348 @@
/*
* (c) 2010 The Board of Trustees of the University of Illinois.
*/
#ifndef PARBOIL_HEADER
#define PARBOIL_HEADER
#include <stdio.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#include <unistd.h>
/* A platform as specified by the user on the command line */
struct pb_PlatformParam {
char *name; /* The platform name. This string is owned. */
char *version; /* The platform version; may be NULL.
* This string is owned. */
};
/* Create a PlatformParam from the given strings.
* 'name' must not be NULL. 'version' may be NULL.
* If not NULL, the strings should have been allocated by malloc(),
* and they will be owned by the returned object.
*/
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version);
void
pb_FreePlatformParam(struct pb_PlatformParam *);
/* A criterion for how to select a device */
enum pb_DeviceSelectionCriterion {
pb_Device_INDEX, /* Enumerate the devices and select one
* by its number */
pb_Device_CPU, /* Select a CPU device */
pb_Device_GPU, /* Select a GPU device */
pb_Device_ACCELERATOR, /* Select an accelerator device */
pb_Device_NAME /* Select a device by name */
};
/* A device as specified by the user on the command line */
struct pb_DeviceParam {
enum pb_DeviceSelectionCriterion criterion;
union {
int index; /* If criterion == pb_Device_INDEX,
* the index of the device */
char *name; /* If criterion == pb_Device_NAME,
* the name of the device.
* This string is owned. */
};
};
struct pb_DeviceParam *
pb_DeviceParam_index(int index);
struct pb_DeviceParam *
pb_DeviceParam_cpu(void);
struct pb_DeviceParam *
pb_DeviceParam_gpu(void);
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void);
/* Create a by-name device selection criterion.
* The string should have been allocated by malloc(), and it will will be
* owned by the returned object.
*/
struct pb_DeviceParam *
pb_DeviceParam_name(char *name);
void
pb_FreeDeviceParam(struct pb_DeviceParam *);
/* Command line parameters for benchmarks */
struct pb_Parameters {
char *outFile; /* If not NULL, the raw output of the
* computation should be saved to this
* file. The string is owned. */
char **inpFiles; /* A NULL-terminated array of strings
* holding the input file(s) for the
* computation. The array and strings
* are owned. */
struct pb_PlatformParam *platform; /* If not NULL, the platform
* specified on the command line. */
struct pb_DeviceParam *device; /* If not NULL, the device
* specified on the command line. */
};
/* Read command-line parameters.
*
* The argc and argv parameters to main are read, and any parameters
* interpreted by this function are removed from the argument list.
*
* A new instance of struct pb_Parameters is returned.
* If there is an error, then an error message is printed on stderr
* and NULL is returned.
*/
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv);
/* Free an instance of struct pb_Parameters.
*/
void
pb_FreeParameters(struct pb_Parameters *p);
void
pb_FreeStringArray(char **);
/* Count the number of input files in a pb_Parameters instance.
*/
int
pb_Parameters_CountInputs(struct pb_Parameters *p);
/* A time or duration. */
//#if _POSIX_VERSION >= 200112L
typedef unsigned long long pb_Timestamp; /* time in microseconds */
//#else
//# error "Timestamps not implemented"
//#endif
enum pb_TimerState {
pb_Timer_STOPPED,
pb_Timer_RUNNING,
};
struct pb_Timer {
enum pb_TimerState state;
pb_Timestamp elapsed; /* Amount of time elapsed so far */
pb_Timestamp init; /* Beginning of the current time interval,
* if state is RUNNING. End of the last
* recorded time interfal otherwise. */
};
/* Reset a timer.
* Use this to initialize a timer or to clear
* its elapsed time. The reset timer is stopped.
*/
void
pb_ResetTimer(struct pb_Timer *timer);
/* Start a timer. The timer is set to RUNNING mode and
* time elapsed while the timer is running is added to
* the timer.
* The timer should not already be running.
*/
void
pb_StartTimer(struct pb_Timer *timer);
/* Stop a timer.
* This stops adding elapsed time to the timer.
* The timer should not already be stopped.
*/
void
pb_StopTimer(struct pb_Timer *timer);
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer);
/* Execution time is assigned to one of these categories. */
enum pb_TimerID {
pb_TimerID_NONE = 0,
pb_TimerID_IO, /* Time spent in input/output */
pb_TimerID_KERNEL, /* Time spent computing on the device,
* recorded asynchronously */
pb_TimerID_COPY, /* Time spent synchronously moving data
* to/from device and allocating/freeing
* memory on the device */
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
* driver, primarily for recording the time
* spent queueing asynchronous operations */
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
pb_TimerID_COMPUTE, /* Time for all program execution other
* than parsing command line arguments,
* I/O, kernel, and copy */
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
* host activity: automatically filled in,
* not intended for direct usage */
pb_TimerID_LAST /* Number of timer IDs */
};
/* Dynamic list of asynchronously tracked times between events */
struct pb_async_time_marker_list {
char *label; // actually just a pointer to a string
enum pb_TimerID timerID; /* The ID to which the interval beginning
* with this marker should be attributed */
void * marker;
//cudaEvent_t marker; /* The driver event for this marker */
struct pb_async_time_marker_list *next;
};
struct pb_SubTimer {
char *label;
struct pb_Timer timer;
struct pb_SubTimer *next;
};
struct pb_SubTimerList {
struct pb_SubTimer *current;
struct pb_SubTimer *subtimer_list;
};
/* A set of timers for recording execution times. */
struct pb_TimerSet {
enum pb_TimerID current;
struct pb_async_time_marker_list* async_markers;
pb_Timestamp async_begin;
pb_Timestamp wall_begin;
struct pb_Timer timers[pb_TimerID_LAST];
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
};
/* Reset all timers in the set. */
void
pb_InitializeTimerSet(struct pb_TimerSet *timers);
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
/* Select which timer the next interval of time should be accounted
* to. The selected timer is started and other timers are stopped.
* Using pb_TimerID_NONE stops all timers. */
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
/* Print timer values to standard output. */
void
pb_PrintTimerSet(struct pb_TimerSet *timers);
/* Release timer resources */
void
pb_DestroyTimerSet(struct pb_TimerSet * timers);
void
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
typedef struct pb_Device_tag {
char* name;
void* clDevice;
int id;
unsigned int in_use;
unsigned int available;
} pb_Device;
struct pb_Context_tag;
typedef struct pb_Context_tag pb_Context;
typedef struct pb_Platform_tag {
char* name;
char* version;
void* clPlatform;
unsigned int in_use;
pb_Context** contexts;
pb_Device** devices;
} pb_Platform;
struct pb_Context_tag {
void* clPlatformId;
void* clContext;
void* clDeviceId;
pb_Platform* pb_platform;
pb_Device* pb_device;
};
// verbosely print out list of platforms and their devices to the console.
pb_Platform**
pb_GetPlatforms();
// Choose a platform according to the given platform specification
pb_Platform*
pb_GetPlatform(struct pb_PlatformParam *platform);
// choose a platform: by name, name & version
pb_Platform*
pb_GetPlatformByName(const char* name);
pb_Platform*
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
// Choose a device according to the given device specification
pb_Device*
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
pb_Device**
pb_GetDevices(pb_Platform* pb_platform);
// choose a device by name.
pb_Device*
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
pb_Platform*
pb_GetPlatformByEnvVars();
pb_Context*
pb_InitOpenCLContext(struct pb_Parameters* parameters);
void
pb_ReleasePlatforms();
void
pb_ReleaseContext(pb_Context* c);
void
pb_PrintPlatformInfo(pb_Context* c);
void
perf_init();
//#define MEASURE_KERNEL_TIME
#include <CL/cl.h>
#ifdef MEASURE_KERNEL_TIME
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
cl_int
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#endif
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
void pb_sig_float(char*, float*, int);
void pb_sig_double(char*, double*, int);
void pb_sig_short(char*, short*, int);
void pb_sig_int(char*, int*, int);
void pb_sig_uchar(char*, unsigned char*, unsigned int);
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
#ifdef __cplusplus
}
#endif
#endif //PARBOIL_HEADER

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,139 @@
/***************************************************************************
*cr
*cr (C) Copyright 2008-2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "atom.h"
#define LINELEN 96
#define INITLEN 20
Atoms *read_atom_file(const char *fname)
{
FILE *file;
char line[LINELEN];
Atom *atom; /* Atom array */
int len = INITLEN; /* Size of atom array */
int cnt = 0; /* Number of atoms read */
/* allocate initial atom array */
atom = (Atom *) malloc(len * sizeof(Atom));
if (NULL==atom) {
fprintf(stderr, "can't allocate memory\n");
return NULL;
}
int i;
for (i = 0; i < len; ++i) {
atom[i].x = i+0;
atom[i].y = i+1;
atom[i].z = i+2;
atom[i].q = 1;
}
#if 0
/* open atom "pqr" file */
file = fopen(fname, "r");
if (NULL==file) {
fprintf(stderr, "can't open file \"%s\" for reading\n", fname);
return NULL;
}
/* loop to read pqr file line by line */
while (fgets(line, LINELEN, file) != NULL) {
if (strncmp(line, "ATOM ", 6) != 0 && strncmp(line, "HETATM", 6) != 0) {
continue; /* skip anything that isn't an atom record */
}
if (cnt==len) { /* extend atom array */
void *tmp = realloc(atom, 2*len*sizeof(Atom));
if (NULL==tmp) {
fprintf(stderr, "can't allocate more memory\n");
return NULL;
}
atom = (Atom *) tmp;
len *= 2;
}
/* read position coordinates and charge from atom record */
if (sscanf(line, "%*s %*d %*s %*s %*d %f %f %f %f", &(atom[cnt].x),
&(atom[cnt].y), &(atom[cnt].z), &(atom[cnt].q)) != 4) {
fprintf(stderr, "atom record %d does not have expected format\n", cnt+1);
return NULL;
}
cnt++; /* count atoms as we store them */
}
/* verify EOF and close file */
if ( !feof(file) ) {
fprintf(stderr, "did not find EOF\n");
return NULL;
}
if (fclose(file)) {
fprintf(stderr, "can't close file\n");
return NULL;
}
#endif
/* Build the output data structure */
{
Atoms *out = (Atoms *)malloc(sizeof(Atoms));
if (NULL == out) {
fprintf(stderr, "can't allocate memory\n");
return NULL;
}
out->size = cnt;
out->atoms = atom;
return out;
}
}
void free_atom(Atoms *atom)
{
if (atom) {
free(atom->atoms);
free(atom);
}
}
void
get_atom_extent(Vec3 *out_lo, Vec3 *out_hi, Atoms *atom)
{
Atom *atoms = atom->atoms;
int natoms = atom->size;
Vec3 lo;
Vec3 hi;
int n;
hi.x = lo.x = atoms[0].x;
hi.y = lo.y = atoms[0].y;
hi.z = lo.z = atoms[0].z;
for (n = 1; n < natoms; n++) {
lo.x = fminf(lo.x, atoms[n].x);
hi.x = fmaxf(hi.x, atoms[n].x);
lo.y = fminf(lo.y, atoms[n].y);
hi.y = fmaxf(hi.y, atoms[n].y);
lo.z = fminf(lo.z, atoms[n].z);
hi.z = fmaxf(hi.z, atoms[n].z);
}
*out_lo = lo;
*out_hi = hi;
}

File diff suppressed because it is too large Load Diff

View File

@@ -73,16 +73,18 @@ main (int argc, char *argv[]) {
/* Read command line */
params = pb_ReadParameters(&argc, argv);
/*if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
{
fprintf(stderr, "Expecting one input filename\n");
exit(-1);
}*/
params->inpFiles = (char **)malloc(sizeof(char *) * 2);
params->inpFiles[0] = (char *)malloc(100);
params->inpFiles[1] = NULL;
strncpy(params->inpFiles[0], "32_32_32_dataset.bin", 100);
if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
{
fprintf(stderr, "Expecting one input filename\n");
exit(-1);
}
/* Read in data */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
inputData(params->inpFiles[0],

View File

@@ -187,9 +187,9 @@ int main(int argc, char** argv) {
size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2};
//size_t grid[3] = {nx-2,ny-2,nz-2};
size_t offset[3] = {1,1,1};
printf("block size in x/y/z = %d %d %d\n",block[0],block[1],block[2]);
printf("grid size in x/y/z = %d %d %d\n",grid[0],grid[1],grid[2]);
printf("block size in x/y/z = %d %d %d\n",block[0],block[1],block[2]);
printf ("blocks = %d\n", (grid[0]/block[0])*(grid[1]/block[1])*(grid[2]*block[2]));
clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0);
@@ -204,10 +204,14 @@ int main(int argc, char** argv) {
//main execution
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
printf("OK+0\n");
int t;
for(t=0;t<iteration;t++)
{
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,grid,block,0,NULL,NULL);
printf("OK+0\n");
//printf("iteration %d\n",t)
CHECK_ERROR("clEnqueueNDRangeKernel")
@@ -218,6 +222,8 @@ int main(int argc, char** argv) {
clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext);
}
printf("OK+1\n");
cl_mem d_temp = d_A0;
d_A0 = d_Anext;
d_Anext = d_temp;
@@ -233,6 +239,8 @@ int main(int argc, char** argv) {
clStatus = clReleaseCommandQueue(clCommandQueue);
clStatus = clReleaseContext(clContext);
CHECK_ERROR("clReleaseContext")
printf("OK+2\n");
if (parameters->outFile) {
pb_SwitchToTimer(&timers, pb_TimerID_IO);