mri-q
This commit is contained in:
BIN
benchmarks/opencl/mri-q/32_32_32_dataset.bin
Executable file
BIN
benchmarks/opencl/mri-q/32_32_32_dataset.bin
Executable file
Binary file not shown.
68
benchmarks/opencl/mri-q/Makefile
Normal file
68
benchmarks/opencl/mri-q/Makefile
Normal file
@@ -0,0 +1,68 @@
|
||||
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH = $(wildcard ../include)
|
||||
POCL_LIB_PATH = $(wildcard ../lib)
|
||||
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
|
||||
PROJECT = mri-q
|
||||
|
||||
SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c file.cc computeQ.c
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||
617
benchmarks/opencl/mri-q/args.c
Normal file
617
benchmarks/opencl/mri-q/args.c
Normal file
@@ -0,0 +1,617 @@
|
||||
|
||||
#include <parboil.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Memory management routines */
|
||||
|
||||
/* Free an array of owned strings. */
|
||||
void
|
||||
pb_FreeStringArray(char **string_array)
|
||||
{
|
||||
char **p;
|
||||
|
||||
if (!string_array) return;
|
||||
for (p = string_array; *p; p++) free(*p);
|
||||
free(string_array);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version)
|
||||
{
|
||||
if (name == NULL) {
|
||||
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *ret =
|
||||
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
|
||||
|
||||
ret->name = name;
|
||||
ret->version = version;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
free(p->name);
|
||||
free(p->version);
|
||||
free(p);
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_INDEX;
|
||||
ret->index = index;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_CPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_GPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_ACCELERATOR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_NAME;
|
||||
ret->name = name;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
switch(p->criterion) {
|
||||
case pb_Device_NAME:
|
||||
free(p->name);
|
||||
break;
|
||||
case pb_Device_INDEX:
|
||||
case pb_Device_CPU:
|
||||
case pb_Device_ACCELERATOR:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p)
|
||||
{
|
||||
free(p->outFile);
|
||||
pb_FreeStringArray(p->inpFiles);
|
||||
pb_FreePlatformParam(p->platform);
|
||||
pb_FreeDeviceParam(p->device);
|
||||
free(p);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
/* Parse a comma-delimited list of strings into an
|
||||
* array of strings. */
|
||||
static char **
|
||||
read_string_array(char *in)
|
||||
{
|
||||
char **ret;
|
||||
int i;
|
||||
int count; /* Number of items in the input */
|
||||
char *substring; /* Current substring within 'in' */
|
||||
|
||||
/* Count the number of items in the string */
|
||||
count = 1;
|
||||
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
|
||||
|
||||
/* Allocate storage */
|
||||
ret = (char **)malloc((count + 1) * sizeof(char *));
|
||||
|
||||
/* Create copies of the strings from the list */
|
||||
substring = in;
|
||||
for (i = 0; i < count; i++) {
|
||||
char *substring_end;
|
||||
int substring_length;
|
||||
|
||||
/* Find length of substring */
|
||||
for (substring_end = substring;
|
||||
(*substring_end != ',') && (*substring_end != 0);
|
||||
substring_end++);
|
||||
|
||||
substring_length = substring_end - substring;
|
||||
|
||||
/* Allocate memory and copy the substring */
|
||||
ret[i] = (char *)malloc(substring_length + 1);
|
||||
memcpy(ret[i], substring, substring_length);
|
||||
ret[i][substring_length] = 0;
|
||||
|
||||
/* go to next substring */
|
||||
substring = substring_end + 1;
|
||||
}
|
||||
ret[i] = NULL; /* Write the sentinel value */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
report_parse_error(const char *str)
|
||||
{
|
||||
fputs(str, stderr);
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_DeviceParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_DeviceParam *
|
||||
read_device_param(char *str)
|
||||
{
|
||||
/* Try different ways of interpreting 'device_string' until one works */
|
||||
|
||||
/* If argument is an integer, then interpret it as a device index */
|
||||
errno = 0;
|
||||
char *end;
|
||||
long device_int = strtol(str, &end, 10);
|
||||
if (!errno) {
|
||||
/* Negative numbers are not valid */
|
||||
if (device_int < 0 || device_int > INT_MAX) return NULL;
|
||||
|
||||
return pb_DeviceParam_index(device_int);
|
||||
}
|
||||
|
||||
/* Match against predefined strings */
|
||||
if (strcmp(str, "CPU") == 0)
|
||||
return pb_DeviceParam_cpu();
|
||||
if (strcmp(str, "GPU") == 0)
|
||||
return pb_DeviceParam_gpu();
|
||||
if (strcmp(str, "ACCELERATOR") == 0)
|
||||
return pb_DeviceParam_accelerator();
|
||||
|
||||
/* Assume any other string is a device name */
|
||||
return pb_DeviceParam_name(strdup(str));
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_PlatformParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_PlatformParam *
|
||||
read_platform_param(char *str)
|
||||
{
|
||||
int separator_index; /* Index of the '-' character separating
|
||||
* name and version number. It's -1 if
|
||||
* there's no '-' character. */
|
||||
|
||||
/* Find the last occurrence of '-' in 'str' */
|
||||
{
|
||||
char *cur;
|
||||
separator_index = -1;
|
||||
for (cur = str; *cur; cur++) {
|
||||
if (*cur == '-') separator_index = cur - str;
|
||||
}
|
||||
}
|
||||
|
||||
/* The platform name is either the entire string, or all characters before
|
||||
* the separator */
|
||||
int name_length = separator_index == -1 ? strlen(str) : separator_index;
|
||||
char *name_str = (char *)malloc(name_length + 1);
|
||||
memcpy(name_str, str, name_length);
|
||||
name_str[name_length] = 0;
|
||||
|
||||
/* The version is either NULL, or all characters after the separator */
|
||||
char *version_str;
|
||||
if (separator_index == -1) {
|
||||
version_str = NULL;
|
||||
}
|
||||
else {
|
||||
const char *version_input_str = str + separator_index + 1;
|
||||
int version_length = strlen(version_input_str);
|
||||
|
||||
version_str = (char *)malloc(version_length + 1);
|
||||
memcpy(version_str, version_input_str, version_length);
|
||||
version_str[version_length] = 0;
|
||||
}
|
||||
|
||||
/* Create output structure */
|
||||
return pb_PlatformParam(name_str, version_str);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* Argument parsing state */
|
||||
|
||||
/* Argument parsing state.
|
||||
*
|
||||
* Arguments that are interpreted by the argument parser are removed from
|
||||
* the list. Variables 'argc' and 'argn' do not count arguments that have
|
||||
* been removed.
|
||||
*
|
||||
* During argument parsing, the array of arguments is compacted, overwriting
|
||||
* the erased arguments. Variable 'argv_put' points to the array element
|
||||
* where the next argument will be written. Variable 'argv_get' points to
|
||||
* the array element where the next argument will be read from.
|
||||
*/
|
||||
struct argparse {
|
||||
int argc; /* Number of arguments. Mutable. */
|
||||
int argn; /* Current argument index. */
|
||||
char **argv_get; /* Argument value being read. */
|
||||
char **argv_put; /* Argument value being written.
|
||||
* argv_put <= argv_get. */
|
||||
};
|
||||
|
||||
static void
|
||||
initialize_argparse(struct argparse *ap, int argc, char **argv)
|
||||
{
|
||||
ap->argc = argc;
|
||||
ap->argn = 0;
|
||||
ap->argv_get = ap->argv_put = argv;
|
||||
}
|
||||
|
||||
/* Finish argument parsing, without processing the remaining arguments.
|
||||
* Write new argument count into _argc. */
|
||||
static void
|
||||
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
|
||||
{
|
||||
/* Move the remaining arguments */
|
||||
for(; ap->argn < ap->argc; ap->argn++)
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
|
||||
/* Update the argument count */
|
||||
*_argc = ap->argc;
|
||||
|
||||
/* Insert a terminating NULL */
|
||||
argv[ap->argc] = NULL;
|
||||
}
|
||||
|
||||
/* Delete the current argument. The argument will not be visible
|
||||
* when argument parsing is done. */
|
||||
static void
|
||||
delete_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "delete_argument\n");
|
||||
}
|
||||
ap->argc--;
|
||||
ap->argv_get++;
|
||||
}
|
||||
|
||||
/* Go to the next argument. Also, move the current argument to its
|
||||
* final location in argv. */
|
||||
static void
|
||||
next_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "next_argument\n");
|
||||
}
|
||||
/* Move argument to its new location. */
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
ap->argn++;
|
||||
}
|
||||
|
||||
static int
|
||||
is_end_of_arguments(struct argparse *ap)
|
||||
{
|
||||
return ap->argn == ap->argc;
|
||||
}
|
||||
|
||||
/* Get the current argument */
|
||||
static char *
|
||||
get_argument(struct argparse *ap)
|
||||
{
|
||||
return *ap->argv_get;
|
||||
}
|
||||
|
||||
/* Get the current argument, and also delete it */
|
||||
static char *
|
||||
consume_argument(struct argparse *ap)
|
||||
{
|
||||
char *ret = get_argument(ap);
|
||||
delete_argument(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* The result of parsing a command-line argument */
|
||||
typedef enum {
|
||||
ARGPARSE_OK, /* Success */
|
||||
ARGPARSE_ERROR, /* Error */
|
||||
ARGPARSE_DONE /* Success, and do not continue parsing */
|
||||
} result;
|
||||
|
||||
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
|
||||
|
||||
|
||||
/* A command-line option */
|
||||
struct option {
|
||||
char short_name; /* If not 0, the one-character
|
||||
* name of this option */
|
||||
const char *long_name; /* If not NULL, the long name of this option */
|
||||
parse_action *action; /* What to do when this option occurs.
|
||||
* Sentinel value is NULL.
|
||||
*/
|
||||
};
|
||||
|
||||
/* Output file
|
||||
*
|
||||
* -o FILE
|
||||
*/
|
||||
static result
|
||||
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-o'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the output file name */
|
||||
free(params->outFile);
|
||||
params->outFile = strdup(consume_argument(ap));
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* Input files
|
||||
*
|
||||
* -i FILE,FILE,...
|
||||
*/
|
||||
static result
|
||||
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-i'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the input file list */
|
||||
pb_FreeStringArray(params->inpFiles);
|
||||
params->inpFiles = read_string_array(consume_argument(ap));
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* End of options
|
||||
*
|
||||
* --
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
return ARGPARSE_DONE;
|
||||
}
|
||||
|
||||
/* OpenCL device
|
||||
*
|
||||
* --device X
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_device(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a device */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--device'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *device_string = consume_argument(ap);
|
||||
struct pb_DeviceParam *device_param = read_device_param(device_string);
|
||||
|
||||
if (!device_param) {
|
||||
report_parse_error("Unrecognized device specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreeDeviceParam(params->device);
|
||||
params->device = device_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
static result
|
||||
parse_platform(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a platform */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--platform'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *platform_string = consume_argument(ap);
|
||||
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
|
||||
|
||||
if (!platform_param) {
|
||||
report_parse_error("Unrecognized platform specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreePlatformParam(params->platform);
|
||||
params->platform = platform_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
|
||||
static struct option options[] = {
|
||||
{ 'o', NULL, &parse_output_file },
|
||||
{ 'i', NULL, &parse_input_files },
|
||||
{ '-', NULL, &parse_end_options },
|
||||
{ 0, "device", &parse_device },
|
||||
{ 0, "platform", &parse_platform },
|
||||
{ 0, NULL, NULL }
|
||||
};
|
||||
|
||||
static int
|
||||
is_last_option(struct option *op)
|
||||
{
|
||||
return op->action == NULL;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* Parse command-line parameters.
|
||||
* Return zero on error, nonzero otherwise.
|
||||
* On error, the other outputs may be invalid.
|
||||
*
|
||||
* The information collected from parameters is used to update
|
||||
* 'ret'. 'ret' should be initialized.
|
||||
*
|
||||
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
|
||||
*/
|
||||
static int
|
||||
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
|
||||
{
|
||||
char *err_message;
|
||||
struct argparse ap;
|
||||
|
||||
/* Each argument */
|
||||
initialize_argparse(&ap, *_argc, argv);
|
||||
while(!is_end_of_arguments(&ap)) {
|
||||
result arg_result; /* Result of parsing this option */
|
||||
char *arg = get_argument(&ap);
|
||||
|
||||
/* Process this argument */
|
||||
if (arg[0] == '-') {
|
||||
/* Single-character flag */
|
||||
if ((arg[1] != 0) && (arg[2] == 0)) {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching short option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->short_name == arg[1]) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
/* Long flag */
|
||||
if (arg[1] == '-') {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching long option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Other arguments are ignored */
|
||||
next_argument(&ap);
|
||||
arg_result = ARGPARSE_OK;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
option_was_processed:
|
||||
/* Decide what to do next based on 'arg_result' */
|
||||
switch(arg_result) {
|
||||
case ARGPARSE_OK:
|
||||
/* Continue processing */
|
||||
break;
|
||||
|
||||
case ARGPARSE_ERROR:
|
||||
/* Error exit from the function */
|
||||
return 0;
|
||||
|
||||
case ARGPARSE_DONE:
|
||||
/* Normal exit from the argument parsing loop */
|
||||
goto end_of_options;
|
||||
}
|
||||
} /* end for each argument */
|
||||
|
||||
/* If all arguments were processed, then normal exit from the loop */
|
||||
|
||||
end_of_options:
|
||||
finalize_argparse(&ap, _argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Other exported functions */
|
||||
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv)
|
||||
{
|
||||
struct pb_Parameters *ret =
|
||||
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
|
||||
|
||||
/* Initialize the parameters structure */
|
||||
ret->outFile = NULL;
|
||||
ret->inpFiles = (char **)malloc(sizeof(char *));
|
||||
ret->inpFiles[0] = NULL;
|
||||
ret->platform = NULL;
|
||||
ret->device = NULL;
|
||||
|
||||
/* Read parameters and update _argc, argv */
|
||||
if (!pb_ParseParameters(ret, _argc, argv)) {
|
||||
/* Parse error */
|
||||
pb_FreeParameters(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; p->inpFiles[n]; n++);
|
||||
return n;
|
||||
}
|
||||
|
||||
118
benchmarks/opencl/mri-q/computeQ.c
Normal file
118
benchmarks/opencl/mri-q/computeQ.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
#include <CL/cl.h>
|
||||
#include "ocl.h"
|
||||
#include "macros.h"
|
||||
#include "computeQ.h"
|
||||
#include "parboil.h"
|
||||
|
||||
#define NC 1
|
||||
|
||||
void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm)
|
||||
{
|
||||
int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK;
|
||||
if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK)
|
||||
phiMagBlocks++;
|
||||
|
||||
size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK;
|
||||
size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK;
|
||||
|
||||
cl_int clStatus;
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(cl_mem),&phiR_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(cl_mem),&phiI_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&phiMag_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(int),&numK);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimPhiMagGrid,&DimPhiMagBlock,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
}
|
||||
|
||||
static
|
||||
unsigned long long int
|
||||
readElapsedTime(cl_event internal)
|
||||
{
|
||||
cl_int status;
|
||||
cl_ulong t_begin, t_end;
|
||||
status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &t_begin, NULL);
|
||||
if (status != CL_SUCCESS) return 0;
|
||||
status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &t_end, NULL);
|
||||
if (status != CL_SUCCESS) return 0;
|
||||
return (unsigned long long int)(t_end - t_begin);
|
||||
}
|
||||
|
||||
|
||||
void computeQ_GPU (int numK,int numX,
|
||||
cl_mem x_d, cl_mem y_d, cl_mem z_d,
|
||||
struct kValues* kVals,
|
||||
cl_mem Qr_d, cl_mem Qi_d,
|
||||
clPrmtr* clPrm)
|
||||
{
|
||||
int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID;
|
||||
if (numK % KERNEL_Q_K_ELEMS_PER_GRID)
|
||||
QGrids++;
|
||||
int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK;
|
||||
if (numX % KERNEL_Q_THREADS_PER_BLOCK)
|
||||
QBlocks++;
|
||||
|
||||
size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC;
|
||||
size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC;
|
||||
|
||||
cl_int clStatus;
|
||||
cl_mem ck;
|
||||
ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus);
|
||||
|
||||
int QGrid;
|
||||
for (QGrid = 0; QGrid < QGrids; QGrid++) {
|
||||
// Put the tile of K values into constant mem
|
||||
int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID;
|
||||
struct kValues* kValsTile = kVals + QGridBase;
|
||||
int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase);
|
||||
|
||||
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d);
|
||||
clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock);
|
||||
|
||||
#define TIMED_EXECUTION
|
||||
#ifdef TIMED_EXECUTION
|
||||
cl_event e;
|
||||
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,&e);
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
clWaitForEvents(1, &e);
|
||||
printf ("%llu\n", readElapsedTime(e));
|
||||
#else
|
||||
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void createDataStructsCPU(int numK, int numX, float** phiMag,
|
||||
float** Qr, float** Qi)
|
||||
{
|
||||
*phiMag = (float* ) memalign(16, numK * sizeof(float));
|
||||
*Qr = (float*) memalign(16, numX * sizeof (float));
|
||||
*Qi = (float*) memalign(16, numX * sizeof (float));
|
||||
}
|
||||
|
||||
14
benchmarks/opencl/mri-q/computeQ.h
Normal file
14
benchmarks/opencl/mri-q/computeQ.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __COMPUTEQ__
|
||||
#define __COMPUTEQ__
|
||||
|
||||
void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm);
|
||||
void computeQ_GPU (int numK,int numX,
|
||||
cl_mem x_d, cl_mem y_d, cl_mem z_d,
|
||||
struct kValues* kVals,
|
||||
cl_mem Qr_d, cl_mem Qi_d,
|
||||
clPrmtr* clPrm);
|
||||
|
||||
void createDataStructsCPU(int numK, int numX, float** phiMag,
|
||||
float** Qr, float** Qi);
|
||||
|
||||
#endif
|
||||
78
benchmarks/opencl/mri-q/file.cc
Normal file
78
benchmarks/opencl/mri-q/file.cc
Normal file
@@ -0,0 +1,78 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
//#include <endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "file.h"
|
||||
|
||||
#if __BYTE_ORDER != __LITTLE_ENDIAN
|
||||
# error "File I/O is not implemented for this system: wrong endianness."
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
void inputData(char* fName, int* _numK, int* _numX,
|
||||
float** kx, float** ky, float** kz,
|
||||
float** x, float** y, float** z,
|
||||
float** phiR, float** phiI)
|
||||
{
|
||||
int numK, numX;
|
||||
FILE* fid = fopen(fName, "r");
|
||||
|
||||
if (fid == NULL)
|
||||
{
|
||||
fprintf(stderr, "Cannot open input file\n");
|
||||
exit(-1);
|
||||
}
|
||||
fread (&numK, sizeof (int), 1, fid);
|
||||
*_numK = numK;
|
||||
fread (&numX, sizeof (int), 1, fid);
|
||||
*_numX = numX;
|
||||
*kx = (float *) memalign(16, numK * sizeof (float));
|
||||
fread (*kx, sizeof (float), numK, fid);
|
||||
*ky = (float *) memalign(16, numK * sizeof (float));
|
||||
fread (*ky, sizeof (float), numK, fid);
|
||||
*kz = (float *) memalign(16, numK * sizeof (float));
|
||||
fread (*kz, sizeof (float), numK, fid);
|
||||
*x = (float *) memalign(16, numX * sizeof (float));
|
||||
fread (*x, sizeof (float), numX, fid);
|
||||
*y = (float *) memalign(16, numX * sizeof (float));
|
||||
fread (*y, sizeof (float), numX, fid);
|
||||
*z = (float *) memalign(16, numX * sizeof (float));
|
||||
fread (*z, sizeof (float), numX, fid);
|
||||
*phiR = (float *) memalign(16, numK * sizeof (float));
|
||||
fread (*phiR, sizeof (float), numK, fid);
|
||||
*phiI = (float *) memalign(16, numK * sizeof (float));
|
||||
fread (*phiI, sizeof (float), numK, fid);
|
||||
fclose (fid);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void outputData(char* fName, float* outR, float* outI, int numX)
|
||||
{
|
||||
FILE* fid = fopen(fName, "w");
|
||||
uint32_t tmp32;
|
||||
|
||||
if (fid == NULL)
|
||||
{
|
||||
fprintf(stderr, "Cannot open output file\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* Write the data size */
|
||||
tmp32 = numX;
|
||||
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
|
||||
|
||||
/* Write the reconstructed data */
|
||||
fwrite (outR, sizeof (float), numX, fid);
|
||||
fwrite (outI, sizeof (float), numX, fid);
|
||||
fclose (fid);
|
||||
}
|
||||
22
benchmarks/opencl/mri-q/file.h
Normal file
22
benchmarks/opencl/mri-q/file.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void inputData(char* fName, int* _numK, int* _numX,
|
||||
float** kx, float** ky, float** kz,
|
||||
float** x, float** y, float** z,
|
||||
float** phiR, float** phiI);
|
||||
|
||||
void outputData(char* fName, float* outR, float* outI, int numX);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
55
benchmarks/opencl/mri-q/gpu_info.c
Normal file
55
benchmarks/opencl/mri-q/gpu_info.c
Normal file
@@ -0,0 +1,55 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
//#include <endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm)
|
||||
{
|
||||
int max_thread;
|
||||
int max_block=8;
|
||||
if(major==1)
|
||||
{
|
||||
if(minor>=2)
|
||||
max_thread=1024;
|
||||
else
|
||||
max_thread=768;
|
||||
}
|
||||
else if(major==2)
|
||||
max_thread=1536;
|
||||
else
|
||||
//newer GPU //keep using 2.0
|
||||
max_thread=1536;
|
||||
|
||||
int _grid;
|
||||
int _thread;
|
||||
|
||||
if(task*pad>sm*max_thread)
|
||||
{
|
||||
_thread=max_thread/max_block;
|
||||
_grid = ((task*pad+_thread-1)/_thread)*_thread;
|
||||
}
|
||||
else
|
||||
{
|
||||
_thread=pad;
|
||||
_grid=task*pad;
|
||||
}
|
||||
|
||||
thread[0]=_thread;
|
||||
grid[0]=_grid;
|
||||
}
|
||||
20
benchmarks/opencl/mri-q/gpu_info.h
Normal file
20
benchmarks/opencl/mri-q/gpu_info.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __GPUINFOH__
|
||||
#define __GPUINFOH__
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm);
|
||||
|
||||
#endif
|
||||
51
benchmarks/opencl/mri-q/kernel.cl
Normal file
51
benchmarks/opencl/mri-q/kernel.cl
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "macros.h"
|
||||
|
||||
__kernel void
|
||||
ComputePhiMag_GPU(__global float* phiR, __global float* phiI, __global float* phiMag, int numK) {
|
||||
int indexK = get_global_id(0);
|
||||
float real = indexK;
|
||||
float imag = indexK;
|
||||
if (indexK < numK) {
|
||||
/*float*/ real = phiR[indexK];
|
||||
/*float*/ imag = phiI[indexK];
|
||||
phiMag[indexK] = real*real + imag*imag;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void
|
||||
ComputeQ_GPU(int numK, int kGlobalIndex,
|
||||
__global float* x, __global float* y, __global float* z,
|
||||
__global float* Qr, __global float* Qi, __global struct kValues* ck)
|
||||
{
|
||||
float sX;
|
||||
float sY;
|
||||
float sZ;
|
||||
float sQr;
|
||||
float sQi;
|
||||
|
||||
// Determine the element of the X arrays computed by this thread
|
||||
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0);
|
||||
|
||||
// Read block's X values from global mem to shared mem
|
||||
sX = x[xIndex];
|
||||
sY = y[xIndex];
|
||||
sZ = z[xIndex];
|
||||
sQr = Qr[xIndex];
|
||||
sQi = Qi[xIndex];
|
||||
|
||||
int kIndex = 0;
|
||||
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID); kIndex++) {
|
||||
if (kGlobalIndex < numK) {
|
||||
float expArg;
|
||||
expArg = PIx2 * (ck[kIndex].Kx * sX +
|
||||
ck[kIndex].Ky * sY +
|
||||
ck[kIndex].Kz * sZ);
|
||||
sQr = sQr + ck[kIndex].PhiMag * cos(expArg); // native_cos(expArg);
|
||||
sQi = sQi + ck[kIndex].PhiMag * sin(expArg); // native_sin(expArg);
|
||||
}
|
||||
kGlobalIndex++;
|
||||
}
|
||||
|
||||
Qr[xIndex] = sQr;
|
||||
Qi[xIndex] = sQi;
|
||||
}
|
||||
BIN
benchmarks/opencl/mri-q/libmri-q.a
Normal file
BIN
benchmarks/opencl/mri-q/libmri-q.a
Normal file
Binary file not shown.
BIN
benchmarks/opencl/mri-q/libsgemm.a
Normal file
BIN
benchmarks/opencl/mri-q/libsgemm.a
Normal file
Binary file not shown.
21
benchmarks/opencl/mri-q/macros.h
Normal file
21
benchmarks/opencl/mri-q/macros.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __MACROS__
|
||||
#define __MACROS__
|
||||
|
||||
#define PI 3.1415926535897932384626433832795029f
|
||||
#define PIx2 6.2831853071795864769252867665590058f
|
||||
|
||||
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
|
||||
#define K_ELEMS_PER_GRID 2048
|
||||
|
||||
#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256
|
||||
#define KERNEL_Q_THREADS_PER_BLOCK 256
|
||||
#define KERNEL_Q_K_ELEMS_PER_GRID 1024
|
||||
|
||||
struct kValues {
|
||||
float Kx;
|
||||
float Ky;
|
||||
float Kz;
|
||||
float PhiMag;
|
||||
};
|
||||
|
||||
#endif
|
||||
293
benchmarks/opencl/mri-q/main.cc
Normal file
293
benchmarks/opencl/mri-q/main.cc
Normal file
@@ -0,0 +1,293 @@
|
||||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
/*
|
||||
* C code for creating the Q data structure for fast convolution-based
|
||||
* Hessian multiplication for arbitrary k-space trajectories.
|
||||
*
|
||||
* Inputs:
|
||||
* kx - VECTOR of kx values, same length as ky and kz
|
||||
* ky - VECTOR of ky values, same length as kx and kz
|
||||
* kz - VECTOR of kz values, same length as kx and ky
|
||||
* x - VECTOR of x values, same length as y and z
|
||||
* y - VECTOR of y values, same length as x and z
|
||||
* z - VECTOR of z values, same length as x and y
|
||||
* phi - VECTOR of the Fourier transform of the spatial basis
|
||||
* function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz.
|
||||
*
|
||||
* recommended g++ options:
|
||||
* -O3 -lm -ffast-math -funroll-all-loops
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/time.h>
|
||||
#include <parboil.h>
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include "ocl.h"
|
||||
#include "file.h"
|
||||
#include "macros.h"
|
||||
#include "computeQ.h"
|
||||
|
||||
static void
|
||||
setupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr,clPrmtr* clPrm)
|
||||
{
|
||||
cl_int clStatus;
|
||||
*dev_ptr = clCreateBuffer(clPrm->clContext,CL_MEM_READ_ONLY,num*size,NULL,&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer");
|
||||
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnequeueWriteBuffer");
|
||||
}
|
||||
|
||||
static void
|
||||
cleanupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr, clPrmtr* clPrm)
|
||||
{
|
||||
cl_int clStatus;
|
||||
clStatus = clEnqueueReadBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueReadBuffer")
|
||||
clStatus = clReleaseMemObject(*dev_ptr);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[]) {
|
||||
int numX, numK; /* Number of X and K values */
|
||||
int original_numK; /* Number of K values in input file */
|
||||
float *kx, *ky, *kz; /* K trajectory (3D vectors) */
|
||||
float *x, *y, *z; /* X coordinates (3D vectors) */
|
||||
float *phiR, *phiI; /* Phi values (complex) */
|
||||
float *phiMag; /* Magnitude of Phi */
|
||||
float *Qr, *Qi; /* Q signal (complex) */
|
||||
|
||||
struct kValues* kVals;
|
||||
|
||||
struct pb_Parameters *params;
|
||||
struct pb_TimerSet timers;
|
||||
|
||||
pb_InitializeTimerSet(&timers);
|
||||
|
||||
/* Read command line */
|
||||
params = pb_ReadParameters(&argc, argv);
|
||||
/*if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
|
||||
{
|
||||
fprintf(stderr, "Expecting one input filename\n");
|
||||
exit(-1);
|
||||
}*/
|
||||
params->inpFiles = (char **)malloc(sizeof(char *) * 2);
|
||||
params->inpFiles[0] = (char *)malloc(100);
|
||||
params->inpFiles[1] = NULL;
|
||||
strncpy(params->inpFiles[0], "32_32_32_dataset.bin", 100);
|
||||
|
||||
/* Read in data */
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
inputData(params->inpFiles[0],
|
||||
&original_numK, &numX,
|
||||
&kx, &ky, &kz,
|
||||
&x, &y, &z,
|
||||
&phiR, &phiI);
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
/* Reduce the number of k-space samples if a number is given
|
||||
* on the command line */
|
||||
if (argc < 2)
|
||||
numK = original_numK;
|
||||
else
|
||||
{
|
||||
int inputK;
|
||||
char *end;
|
||||
inputK = strtol(argv[1], &end, 10);
|
||||
if (end == argv[1])
|
||||
{
|
||||
fprintf(stderr, "Expecting an integer parameter\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
numK = MIN(inputK, original_numK);
|
||||
}
|
||||
|
||||
printf("%d pixels in output; %d samples in trajectory; using %d samples\n",
|
||||
numX, original_numK, numK);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
clPrmtr clPrm;
|
||||
|
||||
pb_Context* pb_context;
|
||||
pb_context = pb_InitOpenCLContext(params);
|
||||
if (pb_context == NULL) {
|
||||
fprintf (stderr, "Error: No OpenCL platform/device can be found.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_int clStatus;
|
||||
cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
|
||||
cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
|
||||
clPrm.clContext = (cl_context) pb_context->clContext;
|
||||
|
||||
clPrm.clCommandQueue = clCreateCommandQueue(clPrm.clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
|
||||
CHECK_ERROR("clCreateCommandQueue")
|
||||
|
||||
pb_SetOpenCL(&(clPrm.clContext), &(clPrm.clCommandQueue));
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
//const char* clSource[] = {readFile("src/opencl_base/kernels.cl")};
|
||||
//cl_program clProgram = clCreateProgramWithSource(clPrm.clContext,1,clSource,NULL,&clStatus);
|
||||
cl_program clProgram = clCreateProgramWithBuiltInKernels(
|
||||
clPrm.clContext, 1, &clDevice, "ComputePhiMag_GPU;ComputeQ_GPU", &clStatus);
|
||||
CHECK_ERROR("clCreateProgramWithSource")
|
||||
|
||||
char options[50];
|
||||
sprintf(options,"-I src/opencl_nvidia");
|
||||
clStatus = clBuildProgram(clProgram,0,NULL,options,NULL,NULL);
|
||||
if (clStatus != CL_SUCCESS) {
|
||||
char buf[4096];
|
||||
clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 4096, buf, NULL);
|
||||
printf ("%s\n", buf);
|
||||
CHECK_ERROR("clBuildProgram")
|
||||
}
|
||||
|
||||
/* Create CPU data structures */
|
||||
createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi);
|
||||
|
||||
/* GPU section 1 (precompute PhiMag) */
|
||||
{
|
||||
clPrm.clKernel = clCreateKernel(clProgram,"ComputePhiMag_GPU",&clStatus);
|
||||
CHECK_ERROR("clCreateKernel")
|
||||
|
||||
/* Mirror several data structures on the device */
|
||||
cl_mem phiR_d;
|
||||
cl_mem phiI_d;
|
||||
cl_mem phiMag_d;
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
|
||||
setupMemoryGPU(numK,sizeof(float),&phiR_d,phiR,&clPrm);
|
||||
setupMemoryGPU(numK,sizeof(float),&phiI_d,phiI,&clPrm);
|
||||
phiMag_d = clCreateBuffer(clPrm.clContext,CL_MEM_WRITE_ONLY,numK*sizeof(float),NULL,&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
|
||||
clStatus = clFinish(clPrm.clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
|
||||
|
||||
computePhiMag_GPU(numK, phiR_d, phiI_d, phiMag_d, &clPrm);
|
||||
|
||||
clStatus = clFinish(clPrm.clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
|
||||
cleanupMemoryGPU(numK,sizeof(float),&phiMag_d,phiMag,&clPrm);
|
||||
|
||||
clStatus = clReleaseMemObject(phiR_d);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
clStatus = clReleaseMemObject(phiI_d);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
}
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
kVals = (struct kValues*)calloc(numK, sizeof (struct kValues));
|
||||
|
||||
int k;
|
||||
for (k = 0; k < numK; k++) {
|
||||
kVals[k].Kx = kx[k];
|
||||
kVals[k].Ky = ky[k];
|
||||
kVals[k].Kz = kz[k];
|
||||
kVals[k].PhiMag = phiMag[k];
|
||||
}
|
||||
|
||||
free(phiMag);
|
||||
|
||||
clStatus = clReleaseKernel(clPrm.clKernel);
|
||||
|
||||
/* GPU section 2 */
|
||||
{
|
||||
clPrm.clKernel = clCreateKernel(clProgram,"ComputeQ_GPU",&clStatus);
|
||||
CHECK_ERROR("clCreateKernel")
|
||||
|
||||
cl_mem x_d;
|
||||
cl_mem y_d;
|
||||
cl_mem z_d;
|
||||
cl_mem Qr_d;
|
||||
cl_mem Qi_d;
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
|
||||
setupMemoryGPU(numX,sizeof(float),&x_d,x,&clPrm);
|
||||
setupMemoryGPU(numX,sizeof(float),&y_d,y,&clPrm);
|
||||
setupMemoryGPU(numX,sizeof(float),&z_d,z,&clPrm);
|
||||
|
||||
Qr_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
clMemSet(&clPrm,Qr_d,0,numX*sizeof(float));
|
||||
Qi_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
clMemSet(&clPrm,Qi_d,0,numX*sizeof(float));
|
||||
|
||||
clStatus = clFinish(clPrm.clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
|
||||
|
||||
computeQ_GPU(numK, numX, x_d, y_d, z_d, kVals, Qr_d, Qi_d, &clPrm);
|
||||
|
||||
clStatus = clFinish(clPrm.clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
|
||||
clStatus = clReleaseMemObject(x_d);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
clStatus = clReleaseMemObject(y_d);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
clStatus = clReleaseMemObject(z_d);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
cleanupMemoryGPU(numX,sizeof(float),&Qr_d,Qr,&clPrm);
|
||||
cleanupMemoryGPU(numX,sizeof(float),&Qi_d,Qi,&clPrm);
|
||||
}
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
if (params->outFile)
|
||||
{
|
||||
/* Write Q to file */
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
outputData(params->outFile, Qr, Qi, numX);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
}
|
||||
|
||||
free (kx);
|
||||
free (ky);
|
||||
free (kz);
|
||||
free (x);
|
||||
free (y);
|
||||
free (z);
|
||||
free (phiR);
|
||||
free (phiI);
|
||||
free (kVals);
|
||||
free (Qr);
|
||||
free (Qi);
|
||||
|
||||
//free((void*)clSource[0]);
|
||||
|
||||
clStatus = clReleaseKernel(clPrm.clKernel);
|
||||
clStatus = clReleaseProgram(clProgram);
|
||||
clStatus = clReleaseCommandQueue(clPrm.clCommandQueue);
|
||||
clStatus = clReleaseContext(clPrm.clContext);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
|
||||
pb_PrintTimerSet(&timers);
|
||||
|
||||
pb_FreeParameters(params);
|
||||
|
||||
return 0;
|
||||
}
|
||||
50
benchmarks/opencl/mri-q/ocl copy.c
Normal file
50
benchmarks/opencl/mri-q/ocl copy.c
Normal file
@@ -0,0 +1,50 @@
|
||||
#include <CL/cl.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ocl.h"
|
||||
|
||||
char* readFile(const char* fileName)
|
||||
{
|
||||
FILE* fp;
|
||||
fp = fopen(fileName,"r");
|
||||
if(fp == NULL)
|
||||
{
|
||||
printf("Error 1!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fseek(fp,0,SEEK_END);
|
||||
long size = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
char* buffer = (char*)malloc(sizeof(char)*(size+1));
|
||||
if(buffer == NULL)
|
||||
{
|
||||
printf("Error 2!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t res = fread(buffer,1,size,fp);
|
||||
if(res != size)
|
||||
{
|
||||
printf("Error 3!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer[size] = 0;
|
||||
fclose(fp);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
|
||||
{
|
||||
cl_int clStatus;
|
||||
char* temp = (char*)malloc(size);
|
||||
memset(temp,val,size);
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
free(temp);
|
||||
}
|
||||
21
benchmarks/opencl/mri-q/ocl copy.h
Normal file
21
benchmarks/opencl/mri-q/ocl copy.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __OCLH__
|
||||
#define __OCLH__
|
||||
|
||||
typedef struct {
|
||||
cl_uint major;
|
||||
cl_uint minor;
|
||||
cl_uint multiProcessorCount;
|
||||
} OpenCLDeviceProp;
|
||||
|
||||
void clMemSet(cl_command_queue, cl_mem, int, size_t);
|
||||
char* readFile(const char*);
|
||||
|
||||
#define CHECK_ERROR(errorMessage) \
|
||||
if(clStatus != CL_SUCCESS) \
|
||||
{ \
|
||||
printf("Error: %s!\n",errorMessage); \
|
||||
printf("Line: %d\n",__LINE__); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#endif
|
||||
50
benchmarks/opencl/mri-q/ocl.c
Normal file
50
benchmarks/opencl/mri-q/ocl.c
Normal file
@@ -0,0 +1,50 @@
|
||||
#include <CL/cl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ocl.h"
|
||||
#include <parboil.h>
|
||||
|
||||
char* readFile(const char* fileName)
|
||||
{
|
||||
FILE* fp;
|
||||
fp = fopen(fileName,"r");
|
||||
if(fp == NULL)
|
||||
{
|
||||
printf("Error 1!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fseek(fp,0,SEEK_END);
|
||||
long size = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
char* buffer = (char*)malloc(sizeof(char)*(size+1));
|
||||
if(buffer == NULL)
|
||||
{
|
||||
printf("Error 2!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t res = fread(buffer,1,size,fp);
|
||||
if(res != size)
|
||||
{
|
||||
printf("Error 3!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer[size] = 0;
|
||||
fclose(fp);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void clMemSet(clPrmtr* clPrm, cl_mem buf, int val, size_t size)
|
||||
{
|
||||
cl_int clStatus;
|
||||
char* temp = (char*)malloc(size);
|
||||
memset(temp,val,size);
|
||||
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
free(temp);
|
||||
}
|
||||
23
benchmarks/opencl/mri-q/ocl.h
Normal file
23
benchmarks/opencl/mri-q/ocl.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef __OCLH__
|
||||
#define __OCLH__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct {
|
||||
cl_context clContext;
|
||||
cl_command_queue clCommandQueue;
|
||||
cl_kernel clKernel;
|
||||
} clPrmtr;
|
||||
|
||||
void clMemSet(clPrmtr*, cl_mem, int, size_t);
|
||||
char* readFile(const char*);
|
||||
|
||||
#define CHECK_ERROR(errorMessage) \
|
||||
if(clStatus != CL_SUCCESS) \
|
||||
{ \
|
||||
printf("Error: %s!\n",errorMessage); \
|
||||
printf("Line: %d\n",__LINE__); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#endif
|
||||
348
benchmarks/opencl/mri-q/parboil.h
Normal file
348
benchmarks/opencl/mri-q/parboil.h
Normal file
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
* (c) 2010 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
#ifndef PARBOIL_HEADER
|
||||
#define PARBOIL_HEADER
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
/* A platform as specified by the user on the command line */
|
||||
struct pb_PlatformParam {
|
||||
char *name; /* The platform name. This string is owned. */
|
||||
char *version; /* The platform version; may be NULL.
|
||||
* This string is owned. */
|
||||
};
|
||||
|
||||
/* Create a PlatformParam from the given strings.
|
||||
* 'name' must not be NULL. 'version' may be NULL.
|
||||
* If not NULL, the strings should have been allocated by malloc(),
|
||||
* and they will be owned by the returned object.
|
||||
*/
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version);
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *);
|
||||
|
||||
/* A criterion for how to select a device */
|
||||
enum pb_DeviceSelectionCriterion {
|
||||
pb_Device_INDEX, /* Enumerate the devices and select one
|
||||
* by its number */
|
||||
pb_Device_CPU, /* Select a CPU device */
|
||||
pb_Device_GPU, /* Select a GPU device */
|
||||
pb_Device_ACCELERATOR, /* Select an accelerator device */
|
||||
pb_Device_NAME /* Select a device by name */
|
||||
};
|
||||
|
||||
/* A device as specified by the user on the command line */
|
||||
struct pb_DeviceParam {
|
||||
enum pb_DeviceSelectionCriterion criterion;
|
||||
union {
|
||||
int index; /* If criterion == pb_Device_INDEX,
|
||||
* the index of the device */
|
||||
char *name; /* If criterion == pb_Device_NAME,
|
||||
* the name of the device.
|
||||
* This string is owned. */
|
||||
};
|
||||
};
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void);
|
||||
|
||||
/* Create a by-name device selection criterion.
|
||||
* The string should have been allocated by malloc(), and it will will be
|
||||
* owned by the returned object.
|
||||
*/
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name);
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *);
|
||||
|
||||
/* Command line parameters for benchmarks */
|
||||
struct pb_Parameters {
|
||||
char *outFile; /* If not NULL, the raw output of the
|
||||
* computation should be saved to this
|
||||
* file. The string is owned. */
|
||||
char **inpFiles; /* A NULL-terminated array of strings
|
||||
* holding the input file(s) for the
|
||||
* computation. The array and strings
|
||||
* are owned. */
|
||||
struct pb_PlatformParam *platform; /* If not NULL, the platform
|
||||
* specified on the command line. */
|
||||
struct pb_DeviceParam *device; /* If not NULL, the device
|
||||
* specified on the command line. */
|
||||
};
|
||||
|
||||
/* Read command-line parameters.
|
||||
*
|
||||
* The argc and argv parameters to main are read, and any parameters
|
||||
* interpreted by this function are removed from the argument list.
|
||||
*
|
||||
* A new instance of struct pb_Parameters is returned.
|
||||
* If there is an error, then an error message is printed on stderr
|
||||
* and NULL is returned.
|
||||
*/
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv);
|
||||
|
||||
/* Free an instance of struct pb_Parameters.
|
||||
*/
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p);
|
||||
|
||||
void
|
||||
pb_FreeStringArray(char **);
|
||||
|
||||
/* Count the number of input files in a pb_Parameters instance.
|
||||
*/
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p);
|
||||
|
||||
/* A time or duration. */
|
||||
//#if _POSIX_VERSION >= 200112L
|
||||
typedef unsigned long long pb_Timestamp; /* time in microseconds */
|
||||
//#else
|
||||
//# error "Timestamps not implemented"
|
||||
//#endif
|
||||
|
||||
enum pb_TimerState {
|
||||
pb_Timer_STOPPED,
|
||||
pb_Timer_RUNNING,
|
||||
};
|
||||
|
||||
struct pb_Timer {
|
||||
enum pb_TimerState state;
|
||||
pb_Timestamp elapsed; /* Amount of time elapsed so far */
|
||||
pb_Timestamp init; /* Beginning of the current time interval,
|
||||
* if state is RUNNING. End of the last
|
||||
* recorded time interfal otherwise. */
|
||||
};
|
||||
|
||||
/* Reset a timer.
|
||||
* Use this to initialize a timer or to clear
|
||||
* its elapsed time. The reset timer is stopped.
|
||||
*/
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Start a timer. The timer is set to RUNNING mode and
|
||||
* time elapsed while the timer is running is added to
|
||||
* the timer.
|
||||
* The timer should not already be running.
|
||||
*/
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Stop a timer.
|
||||
* This stops adding elapsed time to the timer.
|
||||
* The timer should not already be stopped.
|
||||
*/
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer);
|
||||
|
||||
/* Execution time is assigned to one of these categories. */
|
||||
enum pb_TimerID {
|
||||
pb_TimerID_NONE = 0,
|
||||
pb_TimerID_IO, /* Time spent in input/output */
|
||||
pb_TimerID_KERNEL, /* Time spent computing on the device,
|
||||
* recorded asynchronously */
|
||||
pb_TimerID_COPY, /* Time spent synchronously moving data
|
||||
* to/from device and allocating/freeing
|
||||
* memory on the device */
|
||||
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
|
||||
* driver, primarily for recording the time
|
||||
* spent queueing asynchronous operations */
|
||||
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
|
||||
pb_TimerID_COMPUTE, /* Time for all program execution other
|
||||
* than parsing command line arguments,
|
||||
* I/O, kernel, and copy */
|
||||
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
|
||||
* host activity: automatically filled in,
|
||||
* not intended for direct usage */
|
||||
pb_TimerID_LAST /* Number of timer IDs */
|
||||
};
|
||||
|
||||
/* Dynamic list of asynchronously tracked times between events */
|
||||
struct pb_async_time_marker_list {
|
||||
char *label; // actually just a pointer to a string
|
||||
enum pb_TimerID timerID; /* The ID to which the interval beginning
|
||||
* with this marker should be attributed */
|
||||
void * marker;
|
||||
//cudaEvent_t marker; /* The driver event for this marker */
|
||||
struct pb_async_time_marker_list *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimer {
|
||||
char *label;
|
||||
struct pb_Timer timer;
|
||||
struct pb_SubTimer *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimerList {
|
||||
struct pb_SubTimer *current;
|
||||
struct pb_SubTimer *subtimer_list;
|
||||
};
|
||||
|
||||
/* A set of timers for recording execution times. */
|
||||
struct pb_TimerSet {
|
||||
enum pb_TimerID current;
|
||||
struct pb_async_time_marker_list* async_markers;
|
||||
pb_Timestamp async_begin;
|
||||
pb_Timestamp wall_begin;
|
||||
struct pb_Timer timers[pb_TimerID_LAST];
|
||||
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
|
||||
};
|
||||
|
||||
/* Reset all timers in the set. */
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
|
||||
|
||||
/* Select which timer the next interval of time should be accounted
|
||||
* to. The selected timer is started and other timers are stopped.
|
||||
* Using pb_TimerID_NONE stops all timers. */
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
|
||||
|
||||
/* Print timer values to standard output. */
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
/* Release timer resources */
|
||||
void
|
||||
pb_DestroyTimerSet(struct pb_TimerSet * timers);
|
||||
|
||||
void
|
||||
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
|
||||
|
||||
|
||||
typedef struct pb_Device_tag {
|
||||
char* name;
|
||||
void* clDevice;
|
||||
int id;
|
||||
unsigned int in_use;
|
||||
unsigned int available;
|
||||
} pb_Device;
|
||||
|
||||
struct pb_Context_tag;
|
||||
typedef struct pb_Context_tag pb_Context;
|
||||
|
||||
typedef struct pb_Platform_tag {
|
||||
char* name;
|
||||
char* version;
|
||||
void* clPlatform;
|
||||
unsigned int in_use;
|
||||
pb_Context** contexts;
|
||||
pb_Device** devices;
|
||||
} pb_Platform;
|
||||
|
||||
struct pb_Context_tag {
|
||||
void* clPlatformId;
|
||||
void* clContext;
|
||||
void* clDeviceId;
|
||||
pb_Platform* pb_platform;
|
||||
pb_Device* pb_device;
|
||||
};
|
||||
|
||||
// verbosely print out list of platforms and their devices to the console.
|
||||
pb_Platform**
|
||||
pb_GetPlatforms();
|
||||
|
||||
// Choose a platform according to the given platform specification
|
||||
pb_Platform*
|
||||
pb_GetPlatform(struct pb_PlatformParam *platform);
|
||||
|
||||
// choose a platform: by name, name & version
|
||||
pb_Platform*
|
||||
pb_GetPlatformByName(const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
|
||||
|
||||
// Choose a device according to the given device specification
|
||||
pb_Device*
|
||||
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
|
||||
|
||||
pb_Device**
|
||||
pb_GetDevices(pb_Platform* pb_platform);
|
||||
|
||||
// choose a device by name.
|
||||
pb_Device*
|
||||
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByEnvVars();
|
||||
|
||||
pb_Context*
|
||||
pb_InitOpenCLContext(struct pb_Parameters* parameters);
|
||||
|
||||
void
|
||||
pb_ReleasePlatforms();
|
||||
|
||||
void
|
||||
pb_ReleaseContext(pb_Context* c);
|
||||
|
||||
void
|
||||
pb_PrintPlatformInfo(pb_Context* c);
|
||||
|
||||
void
|
||||
perf_init();
|
||||
|
||||
//#define MEASURE_KERNEL_TIME
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef MEASURE_KERNEL_TIME
|
||||
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
|
||||
cl_int
|
||||
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel /* kernel */,
|
||||
cl_uint /* work_dim */,
|
||||
const size_t * /* global_work_offset */,
|
||||
const size_t * /* global_work_size */,
|
||||
const size_t * /* local_work_size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */);
|
||||
#endif
|
||||
|
||||
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
|
||||
void pb_sig_float(char*, float*, int);
|
||||
void pb_sig_double(char*, double*, int);
|
||||
void pb_sig_short(char*, short*, int);
|
||||
void pb_sig_int(char*, int*, int);
|
||||
void pb_sig_uchar(char*, unsigned char*, unsigned int);
|
||||
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //PARBOIL_HEADER
|
||||
|
||||
1394
benchmarks/opencl/mri-q/parboil_opencl.c
Normal file
1394
benchmarks/opencl/mri-q/parboil_opencl.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,427 +0,0 @@
|
||||
/*
|
||||
* (c) 2007 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
|
||||
#include <parboil.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Timer routines */
|
||||
|
||||
static void
|
||||
accumulate_time(pb_Timestamp *accum,
|
||||
pb_Timestamp start,
|
||||
pb_Timestamp end)
|
||||
{
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
*accum += end - start;
|
||||
#else
|
||||
# error "Timestamps not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
static pb_Timestamp get_time()
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
|
||||
}
|
||||
#else
|
||||
# error "no supported time libraries are available on this platform"
|
||||
#endif
|
||||
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer)
|
||||
{
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
timer->elapsed = 0;
|
||||
#else
|
||||
# error "pb_ResetTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer)
|
||||
{
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Ignoring attempt to start a running timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||
{
|
||||
unsigned int numNotStopped = 0x3; // 11
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Timer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Subtimer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotStopped == 0x0) {
|
||||
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
subtimer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
if (numNotStopped & 0x2) {
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
|
||||
if (numNotStopped & 0x1) {
|
||||
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer)
|
||||
{
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
|
||||
}
|
||||
|
||||
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
unsigned int numNotRunning = 0x3; // 0b11
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Timer was not running\n", stderr);
|
||||
numNotRunning &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Subtimer was not running\n", stderr);
|
||||
numNotRunning &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotRunning == 0x0) {
|
||||
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
subtimer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
if (numNotRunning & 0x2) {
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
}
|
||||
|
||||
if (numNotRunning & 0x1) {
|
||||
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
||||
subtimer->init = fini;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer)
|
||||
{
|
||||
double ret;
|
||||
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
ret = timer->elapsed / 1e6;
|
||||
#else
|
||||
# error "pb_GetElapsedTime: not implemented for this system"
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
int n;
|
||||
|
||||
timers->wall_begin = get_time();
|
||||
|
||||
timers->current = pb_TimerID_NONE;
|
||||
|
||||
timers->async_markers = NULL;
|
||||
|
||||
|
||||
for (n = 0; n < pb_TimerID_LAST; n++) {
|
||||
pb_ResetTimer(&timers->timers[n]);
|
||||
timers->sub_timer_list[n] = NULL; // free first?
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
||||
|
||||
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
||||
(sizeof(struct pb_SubTimer));
|
||||
|
||||
int len = strlen(label);
|
||||
|
||||
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
||||
sprintf(subtimer->label, "%s\0", label);
|
||||
|
||||
pb_ResetTimer(&subtimer->timer);
|
||||
subtimer->next = NULL;
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
||||
if (subtimerlist == NULL) {
|
||||
subtimerlist = (struct pb_SubTimerList *) malloc
|
||||
(sizeof(struct pb_SubTimerList));
|
||||
subtimerlist->subtimer_list = subtimer;
|
||||
timers->sub_timer_list[pb_Category] = subtimerlist;
|
||||
} else {
|
||||
// Append to list
|
||||
struct pb_SubTimer *element = subtimerlist->subtimer_list;
|
||||
while (element->next != NULL) {
|
||||
element = element->next;
|
||||
}
|
||||
element->next = subtimer;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
||||
{
|
||||
|
||||
// switchToSub( NULL, NONE
|
||||
// switchToSub( NULL, some
|
||||
// switchToSub( some, some
|
||||
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
|
||||
|
||||
struct pb_Timer *topLevelToStop = NULL;
|
||||
if (timers->current != category && timers->current != pb_TimerID_NONE) {
|
||||
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
|
||||
// NONE shouldn't have a timer associated with it, so exclude from branch
|
||||
topLevelToStop = &timers->timers[timers->current];
|
||||
}
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
|
||||
|
||||
if (timers->current != pb_TimerID_NONE) {
|
||||
if (curr != NULL && topLevelToStop != NULL) {
|
||||
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
|
||||
} else if (curr != NULL) {
|
||||
pb_StopTimer(&curr->timer);
|
||||
} else {
|
||||
pb_StopTimer(topLevelToStop);
|
||||
}
|
||||
}
|
||||
|
||||
subtimerlist = timers->sub_timer_list[category];
|
||||
struct pb_SubTimer *subtimer = NULL;
|
||||
|
||||
if (label != NULL) {
|
||||
subtimer = subtimerlist->subtimer_list;
|
||||
while (subtimer != NULL) {
|
||||
if (strcmp(subtimer->label, label) == 0) {
|
||||
break;
|
||||
} else {
|
||||
subtimer = subtimer->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (category != pb_TimerID_NONE) {
|
||||
|
||||
if (subtimerlist != NULL) {
|
||||
subtimerlist->current = subtimer;
|
||||
}
|
||||
|
||||
if (category != timers->current && subtimer != NULL) {
|
||||
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
||||
} else if (subtimer != NULL) {
|
||||
// Same category, different non-NULL subtimer
|
||||
pb_StartTimer(&subtimer->timer);
|
||||
} else{
|
||||
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
|
||||
pb_StartTimer(&timers->timers[category]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = category;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||
{
|
||||
/* Stop the currently running timer */
|
||||
/*if (timers->current != pb_TimerID_NONE) {
|
||||
struct pb_SubTimer *currSubTimer = NULL;
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
|
||||
if ( subtimerlist != NULL) {
|
||||
currSubTimer = timers->sub_timer_list[timers->current]->current;
|
||||
}
|
||||
if ( currSubTimer!= NULL) {
|
||||
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
|
||||
} else {
|
||||
pb_StopTimer(&timers->timers[timers->current]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = timer;
|
||||
|
||||
if (timer != pb_TimerID_NONE) {
|
||||
pb_StartTimer(&timers->timers[timer]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
|
||||
pb_Timestamp wall_end = get_time();
|
||||
|
||||
struct pb_Timer *t = timers->timers;
|
||||
struct pb_SubTimer* sub = NULL;
|
||||
|
||||
int maxSubLength;
|
||||
|
||||
const char *categories[] = {
|
||||
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
||||
};
|
||||
|
||||
const int maxCategoryLength = 10;
|
||||
|
||||
int i;
|
||||
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
||||
if(pb_GetElapsedTime(&t[i]) != 0) {
|
||||
|
||||
// Print Category Timer
|
||||
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
||||
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
maxSubLength = 0;
|
||||
while (sub != NULL) {
|
||||
// Find longest SubTimer label
|
||||
if (strlen(sub->label) > maxSubLength) {
|
||||
maxSubLength = strlen(sub->label);
|
||||
}
|
||||
sub = sub->next;
|
||||
}
|
||||
|
||||
// Fit to Categories
|
||||
if (maxSubLength <= maxCategoryLength) {
|
||||
maxSubLength = maxCategoryLength;
|
||||
}
|
||||
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
|
||||
// Print SubTimers
|
||||
while (sub != NULL) {
|
||||
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
||||
sub = sub->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
||||
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
||||
|
||||
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
||||
printf("Timer Wall Time: %f\n", walltime);
|
||||
|
||||
}
|
||||
|
||||
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||
{
|
||||
/* clean up all of the async event markers */
|
||||
struct pb_async_time_marker_list ** event = &(timers->async_markers);
|
||||
while( *event != NULL) {
|
||||
struct pb_async_time_marker_list ** next = &((*event)->next);
|
||||
free(*event);
|
||||
(*event) = NULL;
|
||||
event = next;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
||||
struct pb_SubTimer *prev = NULL;
|
||||
while (subtimer != NULL) {
|
||||
free(subtimer->label);
|
||||
prev = subtimer;
|
||||
subtimer = subtimer->next;
|
||||
free(prev);
|
||||
}
|
||||
free(timers->sub_timer_list[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,427 +0,0 @@
|
||||
/*
|
||||
* (c) 2007 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
|
||||
#include <parboil.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Timer routines */
|
||||
|
||||
static void
|
||||
accumulate_time(pb_Timestamp *accum,
|
||||
pb_Timestamp start,
|
||||
pb_Timestamp end)
|
||||
{
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
*accum += end - start;
|
||||
#else
|
||||
# error "Timestamps not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
static pb_Timestamp get_time()
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
|
||||
}
|
||||
#else
|
||||
# error "no supported time libraries are available on this platform"
|
||||
#endif
|
||||
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer)
|
||||
{
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
timer->elapsed = 0;
|
||||
#else
|
||||
# error "pb_ResetTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer)
|
||||
{
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Ignoring attempt to start a running timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||
{
|
||||
unsigned int numNotStopped = 0x3; // 11
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Timer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Subtimer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotStopped == 0x0) {
|
||||
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
subtimer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
if (numNotStopped & 0x2) {
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
|
||||
if (numNotStopped & 0x1) {
|
||||
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer)
|
||||
{
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
|
||||
}
|
||||
|
||||
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
unsigned int numNotRunning = 0x3; // 0b11
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Timer was not running\n", stderr);
|
||||
numNotRunning &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Subtimer was not running\n", stderr);
|
||||
numNotRunning &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotRunning == 0x0) {
|
||||
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
subtimer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
if (numNotRunning & 0x2) {
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
}
|
||||
|
||||
if (numNotRunning & 0x1) {
|
||||
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
||||
subtimer->init = fini;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer)
|
||||
{
|
||||
double ret;
|
||||
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
ret = timer->elapsed / 1e6;
|
||||
#else
|
||||
# error "pb_GetElapsedTime: not implemented for this system"
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
int n;
|
||||
|
||||
timers->wall_begin = get_time();
|
||||
|
||||
timers->current = pb_TimerID_NONE;
|
||||
|
||||
timers->async_markers = NULL;
|
||||
|
||||
|
||||
for (n = 0; n < pb_TimerID_LAST; n++) {
|
||||
pb_ResetTimer(&timers->timers[n]);
|
||||
timers->sub_timer_list[n] = NULL; // free first?
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
||||
|
||||
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
||||
(sizeof(struct pb_SubTimer));
|
||||
|
||||
int len = strlen(label);
|
||||
|
||||
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
||||
sprintf(subtimer->label, "%s\0", label);
|
||||
|
||||
pb_ResetTimer(&subtimer->timer);
|
||||
subtimer->next = NULL;
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
||||
if (subtimerlist == NULL) {
|
||||
subtimerlist = (struct pb_SubTimerList *) malloc
|
||||
(sizeof(struct pb_SubTimerList));
|
||||
subtimerlist->subtimer_list = subtimer;
|
||||
timers->sub_timer_list[pb_Category] = subtimerlist;
|
||||
} else {
|
||||
// Append to list
|
||||
struct pb_SubTimer *element = subtimerlist->subtimer_list;
|
||||
while (element->next != NULL) {
|
||||
element = element->next;
|
||||
}
|
||||
element->next = subtimer;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
||||
{
|
||||
|
||||
// switchToSub( NULL, NONE
|
||||
// switchToSub( NULL, some
|
||||
// switchToSub( some, some
|
||||
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
|
||||
|
||||
struct pb_Timer *topLevelToStop = NULL;
|
||||
if (timers->current != category && timers->current != pb_TimerID_NONE) {
|
||||
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
|
||||
// NONE shouldn't have a timer associated with it, so exclude from branch
|
||||
topLevelToStop = &timers->timers[timers->current];
|
||||
}
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
|
||||
|
||||
if (timers->current != pb_TimerID_NONE) {
|
||||
if (curr != NULL && topLevelToStop != NULL) {
|
||||
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
|
||||
} else if (curr != NULL) {
|
||||
pb_StopTimer(&curr->timer);
|
||||
} else {
|
||||
pb_StopTimer(topLevelToStop);
|
||||
}
|
||||
}
|
||||
|
||||
subtimerlist = timers->sub_timer_list[category];
|
||||
struct pb_SubTimer *subtimer = NULL;
|
||||
|
||||
if (label != NULL) {
|
||||
subtimer = subtimerlist->subtimer_list;
|
||||
while (subtimer != NULL) {
|
||||
if (strcmp(subtimer->label, label) == 0) {
|
||||
break;
|
||||
} else {
|
||||
subtimer = subtimer->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (category != pb_TimerID_NONE) {
|
||||
|
||||
if (subtimerlist != NULL) {
|
||||
subtimerlist->current = subtimer;
|
||||
}
|
||||
|
||||
if (category != timers->current && subtimer != NULL) {
|
||||
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
||||
} else if (subtimer != NULL) {
|
||||
// Same category, different non-NULL subtimer
|
||||
pb_StartTimer(&subtimer->timer);
|
||||
} else{
|
||||
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
|
||||
pb_StartTimer(&timers->timers[category]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = category;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||
{
|
||||
/* Stop the currently running timer */
|
||||
/*if (timers->current != pb_TimerID_NONE) {
|
||||
struct pb_SubTimer *currSubTimer = NULL;
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
|
||||
if ( subtimerlist != NULL) {
|
||||
currSubTimer = timers->sub_timer_list[timers->current]->current;
|
||||
}
|
||||
if ( currSubTimer!= NULL) {
|
||||
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
|
||||
} else {
|
||||
pb_StopTimer(&timers->timers[timers->current]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = timer;
|
||||
|
||||
if (timer != pb_TimerID_NONE) {
|
||||
pb_StartTimer(&timers->timers[timer]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
|
||||
pb_Timestamp wall_end = get_time();
|
||||
|
||||
struct pb_Timer *t = timers->timers;
|
||||
struct pb_SubTimer* sub = NULL;
|
||||
|
||||
int maxSubLength;
|
||||
|
||||
const char *categories[] = {
|
||||
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
||||
};
|
||||
|
||||
const int maxCategoryLength = 10;
|
||||
|
||||
int i;
|
||||
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
||||
if(pb_GetElapsedTime(&t[i]) != 0) {
|
||||
|
||||
// Print Category Timer
|
||||
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
||||
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
maxSubLength = 0;
|
||||
while (sub != NULL) {
|
||||
// Find longest SubTimer label
|
||||
if (strlen(sub->label) > maxSubLength) {
|
||||
maxSubLength = strlen(sub->label);
|
||||
}
|
||||
sub = sub->next;
|
||||
}
|
||||
|
||||
// Fit to Categories
|
||||
if (maxSubLength <= maxCategoryLength) {
|
||||
maxSubLength = maxCategoryLength;
|
||||
}
|
||||
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
|
||||
// Print SubTimers
|
||||
while (sub != NULL) {
|
||||
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
||||
sub = sub->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
||||
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
||||
|
||||
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
||||
printf("Timer Wall Time: %f\n", walltime);
|
||||
|
||||
}
|
||||
|
||||
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||
{
|
||||
/* clean up all of the async event markers */
|
||||
struct pb_async_time_marker_list ** event = &(timers->async_markers);
|
||||
while( *event != NULL) {
|
||||
struct pb_async_time_marker_list ** next = &((*event)->next);
|
||||
free(*event);
|
||||
(*event) = NULL;
|
||||
event = next;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
||||
struct pb_SubTimer *prev = NULL;
|
||||
while (subtimer != NULL) {
|
||||
free(subtimer->label);
|
||||
prev = subtimer;
|
||||
subtimer = subtimer->next;
|
||||
free(prev);
|
||||
}
|
||||
free(timers->sub_timer_list[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,427 +0,0 @@
|
||||
/*
|
||||
* (c) 2007 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
|
||||
#include <parboil.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Timer routines */
|
||||
|
||||
static void
|
||||
accumulate_time(pb_Timestamp *accum,
|
||||
pb_Timestamp start,
|
||||
pb_Timestamp end)
|
||||
{
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
*accum += end - start;
|
||||
#else
|
||||
# error "Timestamps not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
static pb_Timestamp get_time()
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
|
||||
}
|
||||
#else
|
||||
# error "no supported time libraries are available on this platform"
|
||||
#endif
|
||||
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer)
|
||||
{
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
timer->elapsed = 0;
|
||||
#else
|
||||
# error "pb_ResetTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer)
|
||||
{
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Ignoring attempt to start a running timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||
{
|
||||
unsigned int numNotStopped = 0x3; // 11
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Timer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Subtimer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotStopped == 0x0) {
|
||||
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
subtimer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
if (numNotStopped & 0x2) {
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
|
||||
if (numNotStopped & 0x1) {
|
||||
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer)
|
||||
{
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
|
||||
}
|
||||
|
||||
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
unsigned int numNotRunning = 0x3; // 0b11
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Timer was not running\n", stderr);
|
||||
numNotRunning &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Subtimer was not running\n", stderr);
|
||||
numNotRunning &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotRunning == 0x0) {
|
||||
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
subtimer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
if (numNotRunning & 0x2) {
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
}
|
||||
|
||||
if (numNotRunning & 0x1) {
|
||||
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
||||
subtimer->init = fini;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer)
|
||||
{
|
||||
double ret;
|
||||
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
ret = timer->elapsed / 1e6;
|
||||
#else
|
||||
# error "pb_GetElapsedTime: not implemented for this system"
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
int n;
|
||||
|
||||
timers->wall_begin = get_time();
|
||||
|
||||
timers->current = pb_TimerID_NONE;
|
||||
|
||||
timers->async_markers = NULL;
|
||||
|
||||
|
||||
for (n = 0; n < pb_TimerID_LAST; n++) {
|
||||
pb_ResetTimer(&timers->timers[n]);
|
||||
timers->sub_timer_list[n] = NULL; // free first?
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
||||
|
||||
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
||||
(sizeof(struct pb_SubTimer));
|
||||
|
||||
int len = strlen(label);
|
||||
|
||||
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
||||
sprintf(subtimer->label, "%s\0", label);
|
||||
|
||||
pb_ResetTimer(&subtimer->timer);
|
||||
subtimer->next = NULL;
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
||||
if (subtimerlist == NULL) {
|
||||
subtimerlist = (struct pb_SubTimerList *) malloc
|
||||
(sizeof(struct pb_SubTimerList));
|
||||
subtimerlist->subtimer_list = subtimer;
|
||||
timers->sub_timer_list[pb_Category] = subtimerlist;
|
||||
} else {
|
||||
// Append to list
|
||||
struct pb_SubTimer *element = subtimerlist->subtimer_list;
|
||||
while (element->next != NULL) {
|
||||
element = element->next;
|
||||
}
|
||||
element->next = subtimer;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
||||
{
|
||||
|
||||
// switchToSub( NULL, NONE
|
||||
// switchToSub( NULL, some
|
||||
// switchToSub( some, some
|
||||
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
|
||||
|
||||
struct pb_Timer *topLevelToStop = NULL;
|
||||
if (timers->current != category && timers->current != pb_TimerID_NONE) {
|
||||
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
|
||||
// NONE shouldn't have a timer associated with it, so exclude from branch
|
||||
topLevelToStop = &timers->timers[timers->current];
|
||||
}
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
|
||||
|
||||
if (timers->current != pb_TimerID_NONE) {
|
||||
if (curr != NULL && topLevelToStop != NULL) {
|
||||
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
|
||||
} else if (curr != NULL) {
|
||||
pb_StopTimer(&curr->timer);
|
||||
} else {
|
||||
pb_StopTimer(topLevelToStop);
|
||||
}
|
||||
}
|
||||
|
||||
subtimerlist = timers->sub_timer_list[category];
|
||||
struct pb_SubTimer *subtimer = NULL;
|
||||
|
||||
if (label != NULL) {
|
||||
subtimer = subtimerlist->subtimer_list;
|
||||
while (subtimer != NULL) {
|
||||
if (strcmp(subtimer->label, label) == 0) {
|
||||
break;
|
||||
} else {
|
||||
subtimer = subtimer->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (category != pb_TimerID_NONE) {
|
||||
|
||||
if (subtimerlist != NULL) {
|
||||
subtimerlist->current = subtimer;
|
||||
}
|
||||
|
||||
if (category != timers->current && subtimer != NULL) {
|
||||
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
||||
} else if (subtimer != NULL) {
|
||||
// Same category, different non-NULL subtimer
|
||||
pb_StartTimer(&subtimer->timer);
|
||||
} else{
|
||||
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
|
||||
pb_StartTimer(&timers->timers[category]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = category;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||
{
|
||||
/* Stop the currently running timer */
|
||||
/*if (timers->current != pb_TimerID_NONE) {
|
||||
struct pb_SubTimer *currSubTimer = NULL;
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
|
||||
if ( subtimerlist != NULL) {
|
||||
currSubTimer = timers->sub_timer_list[timers->current]->current;
|
||||
}
|
||||
if ( currSubTimer!= NULL) {
|
||||
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
|
||||
} else {
|
||||
pb_StopTimer(&timers->timers[timers->current]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = timer;
|
||||
|
||||
if (timer != pb_TimerID_NONE) {
|
||||
pb_StartTimer(&timers->timers[timer]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
|
||||
pb_Timestamp wall_end = get_time();
|
||||
|
||||
struct pb_Timer *t = timers->timers;
|
||||
struct pb_SubTimer* sub = NULL;
|
||||
|
||||
int maxSubLength;
|
||||
|
||||
const char *categories[] = {
|
||||
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
||||
};
|
||||
|
||||
const int maxCategoryLength = 10;
|
||||
|
||||
int i;
|
||||
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
||||
if(pb_GetElapsedTime(&t[i]) != 0) {
|
||||
|
||||
// Print Category Timer
|
||||
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
||||
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
maxSubLength = 0;
|
||||
while (sub != NULL) {
|
||||
// Find longest SubTimer label
|
||||
if (strlen(sub->label) > maxSubLength) {
|
||||
maxSubLength = strlen(sub->label);
|
||||
}
|
||||
sub = sub->next;
|
||||
}
|
||||
|
||||
// Fit to Categories
|
||||
if (maxSubLength <= maxCategoryLength) {
|
||||
maxSubLength = maxCategoryLength;
|
||||
}
|
||||
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
|
||||
// Print SubTimers
|
||||
while (sub != NULL) {
|
||||
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
||||
sub = sub->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
||||
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
||||
|
||||
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
||||
printf("Timer Wall Time: %f\n", walltime);
|
||||
|
||||
}
|
||||
|
||||
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||
{
|
||||
/* clean up all of the async event markers */
|
||||
struct pb_async_time_marker_list ** event = &(timers->async_markers);
|
||||
while( *event != NULL) {
|
||||
struct pb_async_time_marker_list ** next = &((*event)->next);
|
||||
free(*event);
|
||||
(*event) = NULL;
|
||||
event = next;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
||||
struct pb_SubTimer *prev = NULL;
|
||||
while (subtimer != NULL) {
|
||||
free(subtimer->label);
|
||||
prev = subtimer;
|
||||
subtimer = subtimer->next;
|
||||
free(prev);
|
||||
}
|
||||
free(timers->sub_timer_list[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user