project directories reorganization
This commit is contained in:
297
benchmarks/old_opencl/bfs/main.cc
Executable file
297
benchmarks/old_opencl/bfs/main.cc
Executable file
@@ -0,0 +1,297 @@
|
||||
//--by Jianbin Fang
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef PROFILING
|
||||
#include "timer.h"
|
||||
#endif
|
||||
|
||||
#include "CLHelper.h"
|
||||
#include "util.h"
|
||||
|
||||
#define MAX_THREADS_PER_BLOCK 256
|
||||
|
||||
// Structure to hold a node information
|
||||
struct Node {
|
||||
int starting;
|
||||
int no_of_edges;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------
|
||||
//--bfs on cpu
|
||||
//--programmer: jianbin
|
||||
//--date: 26/01/2011
|
||||
//--note: width is changed to the new_width
|
||||
//----------------------------------------------------------
|
||||
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||
int *h_graph_edges, char *h_graph_mask,
|
||||
char *h_updating_graph_mask, char *h_graph_visited,
|
||||
int *h_cost_ref) {
|
||||
char stop;
|
||||
int k = 0;
|
||||
do {
|
||||
// if no thread changes this value then the loop stops
|
||||
stop = false;
|
||||
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||
if (h_graph_mask[tid] == true) {
|
||||
h_graph_mask[tid] = false;
|
||||
for (int i = h_graph_nodes[tid].starting;
|
||||
i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
|
||||
i++) {
|
||||
int id =
|
||||
h_graph_edges[i]; //--cambine: node id is connected with node tid
|
||||
if (!h_graph_visited[id]) { //--cambine: if node id has not been
|
||||
//visited, enter the body below
|
||||
h_cost_ref[id] = h_cost_ref[tid] + 1;
|
||||
h_updating_graph_mask[id] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||
if (h_updating_graph_mask[tid] == true) {
|
||||
h_graph_mask[tid] = true;
|
||||
h_graph_visited[tid] = true;
|
||||
stop = true;
|
||||
h_updating_graph_mask[tid] = false;
|
||||
}
|
||||
}
|
||||
k++;
|
||||
} while (stop);
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
//--breadth first search on GPUs
|
||||
//----------------------------------------------------------
|
||||
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||
int *h_graph_edges, char *h_graph_mask,
|
||||
char *h_updating_graph_mask, char *h_graph_visited,
|
||||
int *h_cost) throw(std::string) {
|
||||
|
||||
// int number_elements = height*width;
|
||||
char h_over;
|
||||
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
|
||||
d_graph_visited, d_cost, d_over;
|
||||
try {
|
||||
//--1 transfer data from host to device
|
||||
_clInit();
|
||||
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
||||
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
||||
d_updating_graph_mask =
|
||||
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
||||
|
||||
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
||||
d_over = _clMallocRW(sizeof(char), &h_over);
|
||||
|
||||
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
||||
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
|
||||
h_updating_graph_mask);
|
||||
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
||||
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||
|
||||
//--2 invoke kernel
|
||||
#ifdef PROFILING
|
||||
timer kernel_timer;
|
||||
double kernel_time = 0.0;
|
||||
kernel_timer.reset();
|
||||
kernel_timer.start();
|
||||
#endif
|
||||
do {
|
||||
h_over = false;
|
||||
_clMemcpyH2D(d_over, sizeof(char), &h_over);
|
||||
//--kernel 0
|
||||
int kernel_id = 0;
|
||||
int kernel_idx = 0;
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_cost);
|
||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||
|
||||
// int work_items = no_of_nodes;
|
||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||
|
||||
//--kernel 1
|
||||
kernel_id = 1;
|
||||
kernel_idx = 0;
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||
_clSetArgs(kernel_id, kernel_idx++, d_over);
|
||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||
|
||||
// work_items = no_of_nodes;
|
||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||
|
||||
_clMemcpyD2H(d_over, sizeof(char), &h_over);
|
||||
} while (h_over);
|
||||
|
||||
_clFinish();
|
||||
#ifdef PROFILING
|
||||
kernel_timer.stop();
|
||||
kernel_time = kernel_timer.getTimeInSeconds();
|
||||
#endif
|
||||
//--3 transfer data from device to host
|
||||
_clMemcpyD2H(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||
//--statistics
|
||||
#ifdef PROFILING
|
||||
std::cout << "kernel time(s):" << kernel_time << std::endl;
|
||||
#endif
|
||||
//--4 release cl resources.
|
||||
_clFree(d_graph_nodes);
|
||||
_clFree(d_graph_edges);
|
||||
_clFree(d_graph_mask);
|
||||
_clFree(d_updating_graph_mask);
|
||||
_clFree(d_graph_visited);
|
||||
_clFree(d_cost);
|
||||
_clFree(d_over);
|
||||
_clRelease();
|
||||
} catch (std::string msg) {
|
||||
_clFree(d_graph_nodes);
|
||||
_clFree(d_graph_edges);
|
||||
_clFree(d_graph_mask);
|
||||
_clFree(d_updating_graph_mask);
|
||||
_clFree(d_graph_visited);
|
||||
_clFree(d_cost);
|
||||
_clFree(d_over);
|
||||
_clRelease();
|
||||
std::string e_str = "in run_transpose_gpu -> ";
|
||||
e_str += msg;
|
||||
throw(e_str);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------
|
||||
//--cambine: main function
|
||||
//--author: created by Jianbin Fang
|
||||
//--date: 25/01/2011
|
||||
//----------------------------------------------------------
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("enter demo main\n");
|
||||
|
||||
int no_of_nodes;
|
||||
int edge_list_size;
|
||||
FILE *fp;
|
||||
Node *h_graph_nodes;
|
||||
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
||||
|
||||
try {
|
||||
char *input_f = "graph4096.txt";
|
||||
printf("Reading File\n");
|
||||
// Read in Graph from a file
|
||||
fp = fopen(input_f, "r");
|
||||
if (!fp) {
|
||||
printf("Error Reading graph file\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("Reading File completed!\n");
|
||||
|
||||
int source = 0;
|
||||
|
||||
fscanf(fp, "%d", &no_of_nodes);
|
||||
|
||||
int num_of_blocks = 1;
|
||||
int num_of_threads_per_block = no_of_nodes;
|
||||
|
||||
// Make execution Parameters according to the number of nodes
|
||||
// Distribute threads across multiple Blocks if necessary
|
||||
if (no_of_nodes > MAX_THREADS_PER_BLOCK) {
|
||||
num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK);
|
||||
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
}
|
||||
work_group_size = num_of_threads_per_block;
|
||||
// allocate host memory
|
||||
h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes);
|
||||
h_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
h_updating_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
h_graph_visited = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||
|
||||
int start, edgeno;
|
||||
// initalize the memory
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
fscanf(fp, "%d %d", &start, &edgeno);
|
||||
h_graph_nodes[i].starting = start;
|
||||
h_graph_nodes[i].no_of_edges = edgeno;
|
||||
h_graph_mask[i] = false;
|
||||
h_updating_graph_mask[i] = false;
|
||||
h_graph_visited[i] = false;
|
||||
}
|
||||
// read the source node from the file
|
||||
fscanf(fp, "%d", &source);
|
||||
source = 0;
|
||||
// set the source node as true in the mask
|
||||
h_graph_mask[source] = true;
|
||||
h_graph_visited[source] = true;
|
||||
fscanf(fp, "%d", &edge_list_size);
|
||||
int id, cost;
|
||||
int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size);
|
||||
for (int i = 0; i < edge_list_size; i++) {
|
||||
fscanf(fp, "%d", &id);
|
||||
fscanf(fp, "%d", &cost);
|
||||
h_graph_edges[i] = id;
|
||||
}
|
||||
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
// allocate mem for the result on host side
|
||||
int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||
int *h_cost_ref = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
h_cost[i] = -1;
|
||||
h_cost_ref[i] = -1;
|
||||
}
|
||||
h_cost[source] = 0;
|
||||
h_cost_ref[source] = 0;
|
||||
//---------------------------------------------------------
|
||||
//--gpu entry
|
||||
run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
||||
//---------------------------------------------------------
|
||||
//--cpu entry
|
||||
// initalize the memory again
|
||||
for (int i = 0; i < no_of_nodes; i++) {
|
||||
h_graph_mask[i] = false;
|
||||
h_updating_graph_mask[i] = false;
|
||||
h_graph_visited[i] = false;
|
||||
}
|
||||
// set the source node as true in the mask
|
||||
source = 0;
|
||||
h_graph_mask[source] = true;
|
||||
h_graph_visited[source] = true;
|
||||
run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||
h_graph_mask, h_updating_graph_mask, h_graph_visited,
|
||||
h_cost_ref);
|
||||
//---------------------------------------------------------
|
||||
//--result varification
|
||||
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
||||
// release host memory
|
||||
free(h_graph_nodes);
|
||||
free(h_graph_mask);
|
||||
free(h_updating_graph_mask);
|
||||
free(h_graph_visited);
|
||||
|
||||
} catch (std::string msg) {
|
||||
std::cout << "--cambine: exception in main ->" << msg << std::endl;
|
||||
// release host memory
|
||||
free(h_graph_nodes);
|
||||
free(h_graph_mask);
|
||||
free(h_updating_graph_mask);
|
||||
free(h_graph_visited);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user