Files
kernels/tests/opencl/mri-q/main.cc
2021-06-13 17:42:04 -07:00

296 lines
8.5 KiB
C++

/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/*
* C code for creating the Q data structure for fast convolution-based
* Hessian multiplication for arbitrary k-space trajectories.
*
* Inputs:
* kx - VECTOR of kx values, same length as ky and kz
* ky - VECTOR of ky values, same length as kx and kz
* kz - VECTOR of kz values, same length as kx and ky
* x - VECTOR of x values, same length as y and z
* y - VECTOR of y values, same length as x and z
* z - VECTOR of z values, same length as x and y
* phi - VECTOR of the Fourier transform of the spatial basis
* function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz.
*
* recommended g++ options:
* -O3 -lm -ffast-math -funroll-all-loops
*/
#include <stdio.h>
#include <sys/time.h>
#include <parboil.h>
#include <CL/cl.h>
#include "ocl.h"
#include "file.h"
#include "macros.h"
#include "computeQ.h"
static void
setupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr,clPrmtr* clPrm)
{
cl_int clStatus;
*dev_ptr = clCreateBuffer(clPrm->clContext,CL_MEM_READ_ONLY,num*size,NULL,&clStatus);
CHECK_ERROR("clCreateBuffer");
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL);
CHECK_ERROR("clEnequeueWriteBuffer");
}
static void
cleanupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr, clPrmtr* clPrm)
{
cl_int clStatus;
clStatus = clEnqueueReadBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL);
CHECK_ERROR("clEnqueueReadBuffer")
clStatus = clReleaseMemObject(*dev_ptr);
CHECK_ERROR("clReleaseMemObject")
}
int
main (int argc, char *argv[]) {
int numX, numK; /* Number of X and K values */
int original_numK; /* Number of K values in input file */
float *kx, *ky, *kz; /* K trajectory (3D vectors) */
float *x, *y, *z; /* X coordinates (3D vectors) */
float *phiR, *phiI; /* Phi values (complex) */
float *phiMag; /* Magnitude of Phi */
float *Qr, *Qi; /* Q signal (complex) */
struct kValues* kVals;
struct pb_Parameters *params;
struct pb_TimerSet timers;
pb_InitializeTimerSet(&timers);
/* Read command line */
params = pb_ReadParameters(&argc, argv);
params->inpFiles = (char **)malloc(sizeof(char *) * 2);
params->inpFiles[0] = (char *)malloc(100);
params->inpFiles[1] = NULL;
strncpy(params->inpFiles[0], "32_32_32_dataset.bin", 100);
if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
{
fprintf(stderr, "Expecting one input filename\n");
exit(-1);
}
/* Read in data */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
inputData(params->inpFiles[0],
&original_numK, &numX,
&kx, &ky, &kz,
&x, &y, &z,
&phiR, &phiI);
printf("OK\n");
/* Reduce the number of k-space samples if a number is given
* on the command line */
if (argc < 2)
numK = original_numK;
else
{
int inputK;
char *end;
inputK = strtol(argv[1], &end, 10);
if (end == argv[1])
{
fprintf(stderr, "Expecting an integer parameter\n");
exit(-1);
}
numK = MIN(inputK, original_numK);
}
printf("%d pixels in output; %d samples in trajectory; using %d samples\n",
numX, original_numK, numK);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
clPrmtr clPrm;
pb_Context* pb_context;
pb_context = pb_InitOpenCLContext(params);
if (pb_context == NULL) {
fprintf (stderr, "Error: No OpenCL platform/device can be found.");
return -1;
}
cl_int clStatus;
cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
clPrm.clContext = (cl_context) pb_context->clContext;
clPrm.clCommandQueue = clCreateCommandQueue(clPrm.clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
CHECK_ERROR("clCreateCommandQueue")
pb_SetOpenCL(&(clPrm.clContext), &(clPrm.clCommandQueue));
printf("OK\n");
//const char* clSource[] = {readFile("src/opencl_base/kernels.cl")};
//cl_program clProgram = clCreateProgramWithSource(clPrm.clContext,1,clSource,NULL,&clStatus);
cl_program clProgram = clCreateProgramWithBuiltInKernels(
clPrm.clContext, 1, &clDevice, "ComputePhiMag_GPU;ComputeQ_GPU", &clStatus);
CHECK_ERROR("clCreateProgramWithSource")
char options[50];
sprintf(options,"-I src/opencl_nvidia");
clStatus = clBuildProgram(clProgram,0,NULL,options,NULL,NULL);
if (clStatus != CL_SUCCESS) {
char buf[4096];
clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 4096, buf, NULL);
printf ("%s\n", buf);
CHECK_ERROR("clBuildProgram")
}
/* Create CPU data structures */
createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi);
/* GPU section 1 (precompute PhiMag) */
{
clPrm.clKernel = clCreateKernel(clProgram,"ComputePhiMag_GPU",&clStatus);
CHECK_ERROR("clCreateKernel")
/* Mirror several data structures on the device */
cl_mem phiR_d;
cl_mem phiI_d;
cl_mem phiMag_d;
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
setupMemoryGPU(numK,sizeof(float),&phiR_d,phiR,&clPrm);
setupMemoryGPU(numK,sizeof(float),&phiI_d,phiI,&clPrm);
phiMag_d = clCreateBuffer(clPrm.clContext,CL_MEM_WRITE_ONLY,numK*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clStatus = clFinish(clPrm.clCommandQueue);
CHECK_ERROR("clFinish")
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
computePhiMag_GPU(numK, phiR_d, phiI_d, phiMag_d, &clPrm);
clStatus = clFinish(clPrm.clCommandQueue);
CHECK_ERROR("clFinish")
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
cleanupMemoryGPU(numK,sizeof(float),&phiMag_d,phiMag,&clPrm);
clStatus = clReleaseMemObject(phiR_d);
CHECK_ERROR("clReleaseMemObject")
clStatus = clReleaseMemObject(phiI_d);
CHECK_ERROR("clReleaseMemObject")
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
kVals = (struct kValues*)calloc(numK, sizeof (struct kValues));
int k;
for (k = 0; k < numK; k++) {
kVals[k].Kx = kx[k];
kVals[k].Ky = ky[k];
kVals[k].Kz = kz[k];
kVals[k].PhiMag = phiMag[k];
}
free(phiMag);
clStatus = clReleaseKernel(clPrm.clKernel);
/* GPU section 2 */
{
clPrm.clKernel = clCreateKernel(clProgram,"ComputeQ_GPU",&clStatus);
CHECK_ERROR("clCreateKernel")
cl_mem x_d;
cl_mem y_d;
cl_mem z_d;
cl_mem Qr_d;
cl_mem Qi_d;
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
setupMemoryGPU(numX,sizeof(float),&x_d,x,&clPrm);
setupMemoryGPU(numX,sizeof(float),&y_d,y,&clPrm);
setupMemoryGPU(numX,sizeof(float),&z_d,z,&clPrm);
Qr_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clMemSet(&clPrm,Qr_d,0,numX*sizeof(float));
Qi_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus);
CHECK_ERROR("clCreateBuffer")
clMemSet(&clPrm,Qi_d,0,numX*sizeof(float));
clStatus = clFinish(clPrm.clCommandQueue);
CHECK_ERROR("clFinish")
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
computeQ_GPU(numK, numX, x_d, y_d, z_d, kVals, Qr_d, Qi_d, &clPrm);
clStatus = clFinish(clPrm.clCommandQueue);
CHECK_ERROR("clFinish")
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
clStatus = clReleaseMemObject(x_d);
CHECK_ERROR("clReleaseMemObject")
clStatus = clReleaseMemObject(y_d);
CHECK_ERROR("clReleaseMemObject")
clStatus = clReleaseMemObject(z_d);
CHECK_ERROR("clReleaseMemObject")
cleanupMemoryGPU(numX,sizeof(float),&Qr_d,Qr,&clPrm);
cleanupMemoryGPU(numX,sizeof(float),&Qi_d,Qi,&clPrm);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
if (params->outFile)
{
/* Write Q to file */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
outputData(params->outFile, Qr, Qi, numX);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
}
free (kx);
free (ky);
free (kz);
free (x);
free (y);
free (z);
free (phiR);
free (phiI);
free (kVals);
free (Qr);
free (Qi);
//free((void*)clSource[0]);
clStatus = clReleaseKernel(clPrm.clKernel);
clStatus = clReleaseProgram(clProgram);
clStatus = clReleaseCommandQueue(clPrm.clCommandQueue);
clStatus = clReleaseContext(clPrm.clContext);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(params);
return 0;
}