diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index e94f86fd..fd5dd59f 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -170,7 +170,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .NUM_INPUTS (RSP_ARB_SIZE), .DATAW (RSP_ARB_DATAW), .ARBITER ("R"), - .OUT_REG (1) + .OUT_REG (3) ) rsp_arb ( .clk (clk), .reset (commit_reset), @@ -186,7 +186,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_gather_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_REG (3) + .OUT_REG (1) ) gather_unit ( .clk (clk), .reset (commit_reset), diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 56008d42..0db2015d 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -50,9 +50,9 @@ CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2 CONFIGS_4c := -DNUM_CLUSTERS=1 -DNUM_CORES=4 CONFIGS_8c := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 +CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE +CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE +CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index ad8e77a7..c8714779 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -67,9 +67,9 @@ CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2 CONFIGS_4c := -DNUM_CLUSTERS=1 -DNUM_CORES=4 CONFIGS_8c := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 +CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE +CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE +CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths diff --git a/hw/unittest/top_modules/Makefile b/hw/unittest/top_modules/Makefile index 2d0319e7..7445381e 100644 --- a/hw/unittest/top_modules/Makefile +++ b/hw/unittest/top_modules/Makefile @@ -56,7 +56,6 @@ PROJECT = top_modules all: build build: $(SRCS) - verilator --build $(VL_FLAGS) --cc VX_cache_cluster_top --top-module VX_cache_cluster_top $^ -CFLAGS '$(CXXFLAGS)' verilator --build $(VL_FLAGS) --cc VX_cache_top --top-module VX_cache_top $^ -CFLAGS '$(CXXFLAGS)' verilator --build $(VL_FLAGS) --cc VX_core_top --top-module VX_core_top $^ -CFLAGS '$(CXXFLAGS)' diff --git a/tests/opencl/Makefile b/tests/opencl/Makefile index acb96ba9..88236559 100644 --- a/tests/opencl/Makefile +++ b/tests/opencl/Makefile @@ -31,16 +31,16 @@ run-simx: $(MAKE) -C dotproduct run-simx $(MAKE) -C kmeans run-simx $(MAKE) -C spmv run-simx - $(MAKE) -C cutcp run-simx $(MAKE) -C stencil run-simx $(MAKE) -C lbm run-simx $(MAKE) -C oclprintf run-simx $(MAKE) -C blackscholes run-simx $(MAKE) -C transpose run-simx $(MAKE) -C convolution run-simx -# $(MAKE) -C matmul run-simx -# $(MAKE) -C vectorhypot run-simx -# $(MAKE) -C mri-q run-simx + $(MAKE) -C cutcp run-simx + $(MAKE) -C matmul run-simx + $(MAKE) -C vectorhypot run-simx + $(MAKE) -C mri-q run-simx run-rtlsim: $(MAKE) -C vecadd run-rtlsim @@ -54,12 +54,12 @@ run-rtlsim: $(MAKE) -C kmeans run-rtlsim $(MAKE) -C spmv run-rtlsim $(MAKE) -C transpose run-rtlsim - $(MAKE) -C cutcp run-rtlsim $(MAKE) -C stencil run-rtlsim $(MAKE) -C lbm run-rtlsim $(MAKE) -C oclprintf run-rtlsim $(MAKE) -C blackscholes run-rtlsim $(MAKE) -C convolution run-rtlsim +# $(MAKE) -C cutcp run-rtlsim # $(MAKE) -C matmul run-rtlsim # $(MAKE) -C vectorhypot run-rtlsim # $(MAKE) -C mri-q run-rtlsim @@ -76,12 +76,12 @@ run-opae: $(MAKE) -C kmeans run-opae $(MAKE) -C spmv run-opae $(MAKE) -C transpose run-opae - $(MAKE) -C cutcp run-opae $(MAKE) -C stencil run-opae $(MAKE) -C lbm run-opae $(MAKE) -C oclprintf run-opae $(MAKE) -C blackscholes run-opae $(MAKE) -C convolution run-opae +# $(MAKE) -C cutcp run-opae # $(MAKE) -C matmul run-opae # $(MAKE) -C vectorhypot run-opae # $(MAKE) -C mri-q run-opae diff --git a/tests/opencl/lbm/main.cc b/tests/opencl/lbm/main.cc index 1d825239..58a930e9 100644 --- a/tests/opencl/lbm/main.cc +++ b/tests/opencl/lbm/main.cc @@ -173,14 +173,10 @@ void MAIN_initialize(const MAIN_Param *param, const OpenCL_Param *prm) { pb_SwitchToTimer(&timers, pb_TimerID_COPY); - printf("OK+\n"); - // Setup DEVICE datastructures OpenCL_LBM_allocateGrid(prm, &OpenCL_srcGrid); OpenCL_LBM_allocateGrid(prm, &OpenCL_dstGrid); - printf("OK-\n"); - // Initialize DEVICE datastructures OpenCL_LBM_initializeGrid(prm, OpenCL_srcGrid, TEMP_srcGrid); OpenCL_LBM_initializeGrid(prm, OpenCL_dstGrid, TEMP_dstGrid); diff --git a/tests/opencl/spmv/convert_dataset.c b/tests/opencl/spmv/convert_dataset.c index 122d8819..aba9c3b3 100644 --- a/tests/opencl/spmv/convert_dataset.c +++ b/tests/opencl/spmv/convert_dataset.c @@ -91,15 +91,11 @@ int coo_to_jds(char *mtx_filename, int pad_rows, int warp_size, int pack_size, if ((f = fopen(mtx_filename, "r")) == NULL) exit(1); - printf("OK**\n"); - if (mm_read_banner(f, &matcode) != 0) { printf("Could not process Matrix Market banner.\n"); exit(1); } - printf("OK**\n"); - /* This is how one can screen matrix types if their application */ /* only supports a subset of the Matrix Market data types. */ diff --git a/tests/opencl/spmv/main.cc b/tests/opencl/spmv/main.cc index 85182322..01aa43cd 100644 --- a/tests/opencl/spmv/main.cc +++ b/tests/opencl/spmv/main.cc @@ -148,7 +148,6 @@ int main(int argc, char **argv) { // &h_data, &h_indices, &h_ptr, // &h_perm, &h_nzcnt); int col_count; - printf("OK--\n"); coo_to_jds(parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx 1, // row padding pad, // warp size @@ -159,8 +158,6 @@ int main(int argc, char **argv) { &h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm, &col_count, &dim, &len, &nzcnt_len, &depth); - printf("OK++\n"); - // pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); h_Ax_vector = (float *)malloc(sizeof(float) * dim); h_x_vector = (float *)malloc(sizeof(float) * dim); diff --git a/tests/opencl/stencil/main.cc b/tests/opencl/stencil/main.cc index a68bd5a3..cbbed6bc 100644 --- a/tests/opencl/stencil/main.cc +++ b/tests/opencl/stencil/main.cc @@ -157,9 +157,7 @@ int main(int argc, char** argv) { CHECK_ERROR("clBuildProgram") cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus); - CHECK_ERROR("clCreateKernel") - - printf("OK+\n"); + CHECK_ERROR("clCreateKernel") //host data float *h_A0; @@ -177,15 +175,11 @@ int main(int argc, char** argv) { h_Anext=(float*)malloc(sizeof(float)*size); pb_SwitchToTimer(&timers, pb_TimerID_IO); //FILE *fp = fopen(parameters->inpFiles[0], "rb"); - printf("OK+\n"); read_data(h_A0, nx,ny,nz,NULL); - printf("OK+\n"); - //fclose(fp); - memcpy (h_Anext,h_A0,sizeof(float)*size); + //fclose(fp); + memcpy (h_Anext,h_A0,sizeof(float)*size); pb_SwitchToTimer(&timers, pb_TimerID_COPY); - - printf("OK+\n"); //memory allocation d_A0 = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus); @@ -201,18 +195,16 @@ int main(int argc, char** argv) { pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - printf("OK+\n"); - //only use 1D thread block - int tx = 128; + int tx = 128; size_t block[3] = {tx,1,1}; size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2}; - //size_t grid[3] = {nx-2,ny-2,nz-2}; - size_t offset[3] = {1,1,1}; - printf("grid size in x/y/z = %d %d %d\n",grid[0],grid[1],grid[2]); + //size_t grid[3] = {nx-2,ny-2,nz-2}; + size_t offset[3] = {1,1,1}; + printf("grid size in x/y/z = %d %d %d\n",grid[0],grid[1],grid[2]); printf("block size in x/y/z = %d %d %d\n",block[0],block[1],block[2]); - printf ("blocks = %d\n", (grid[0]/block[0])*(grid[1]/block[1])*(grid[2]*block[2])); + printf ("blocks = %d\n", (grid[0]/block[0])*(grid[1]/block[1])*(grid[2]*block[2])); clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0); clStatus = clSetKernelArg(clKernel,1,sizeof(float),(void*)&c1); @@ -226,14 +218,10 @@ int main(int argc, char** argv) { //main execution pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); - printf("OK+0\n"); - int t; for(t=0;toutFile) { pb_SwitchToTimer(&timers, pb_TimerID_IO);