From fc155e122352a7a09e041ae5d324e0795f5c964d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 14 Apr 2020 06:35:20 -0400 Subject: [PATCH] project directories reorganization --- benchmarks/new_opencl/bfs/Makefile | 47 - benchmarks/new_opencl/convolution/Makefile | 47 - benchmarks/new_opencl/guassian/Makefile | 47 - benchmarks/new_opencl/kmeans/Makefile | 47 - benchmarks/new_opencl/nearn/Makefile | 47 - benchmarks/new_opencl/saxpy/Makefile | 47 - benchmarks/new_opencl/sfilter/Makefile | 47 - benchmarks/new_opencl/sgemm/Makefile | 47 - benchmarks/new_opencl/transpose/Makefile | 47 - benchmarks/new_opencl/vecadd/Makefile | 47 - .../BlackScholes/BlackScholes.cl | 0 .../BlackScholes/Makefile | 0 .../BlackScholes/main.cpp | 0 .../BlackScholes/oclBlackScholes.pdf | Bin .../BlackScholes/oclBlackScholes_common.h | 0 .../BlackScholes/oclBlackScholes_gold.cpp | 0 .../BlackScholes/oclBlackScholes_launcher.cpp | 0 .../BlackScholes/oclUtils.h | 0 .../BlackScholes/shrQATest.h | 0 .../BlackScholes/shrUtils.h | 0 .../DotProduct/DotProduct.cl | 0 .../DotProduct/Makefile | 0 .../{opencl => old_opencl}/DotProduct/main.cc | 0 .../DotProduct/oclUtils.h | 0 .../DotProduct/shrQATest.h | 0 .../DotProduct/shrUtils.h | 0 .../VectorHypot/Makefile | 0 .../VectorHypot/VectorHypot.cl | 0 .../VectorHypot/main.cc | 0 .../VectorHypot/oclUtils.h | 0 .../VectorHypot/shrQATest.h | 0 .../VectorHypot/shrUtils.h | 0 .../{new_opencl => old_opencl}/bfs/CLHelper.h | 38 +- benchmarks/old_opencl/bfs/Makefile | 68 + .../{new_opencl => old_opencl}/bfs/README | 0 .../bfs/graph4096.txt | 0 .../{new_opencl => old_opencl}/bfs/kernel.cl | 0 .../{opencl => old_opencl}/bfs/libbfs.a | Bin .../{new_opencl => old_opencl}/bfs/main.cc | 2 +- benchmarks/{new_opencl => old_opencl}/bfs/run | 0 .../{new_opencl => old_opencl}/bfs/timer.cc | 0 .../{new_opencl => old_opencl}/bfs/timer.h | 0 .../{new_opencl => old_opencl}/bfs/util.h | 0 benchmarks/old_opencl/convolution/Makefile | 68 + .../convolution/input.bmp | Bin .../convolution/kernel.cl | 106 +- .../convolution/main.cpp | 520 ++-- .../convolution/utils.cpp | 0 .../convolution/utils.h | 0 .../{opencl => old_opencl}/cutcp/Makefile | 0 .../{opencl => old_opencl}/cutcp/args.c | 0 .../{opencl => old_opencl}/cutcp/atom.h | 0 .../{opencl => old_opencl}/cutcp/cutcpu.c | 0 .../{opencl => old_opencl}/cutcp/cutoff.c | 0 .../{opencl => old_opencl}/cutcp/cutoff.h | 0 .../{opencl => old_opencl}/cutcp/excl.c | 0 .../{opencl => old_opencl}/cutcp/gpu_info.c | 0 .../{opencl => old_opencl}/cutcp/gpu_info.h | 0 .../{opencl => old_opencl}/cutcp/kernel.cl | 0 .../{opencl => old_opencl}/cutcp/libcutcp.a | Bin .../{opencl => old_opencl}/cutcp/macros.h | 0 .../{opencl => old_opencl}/cutcp/main.cc | 0 benchmarks/{opencl => old_opencl}/cutcp/ocl.c | 0 benchmarks/{opencl => old_opencl}/cutcp/ocl.h | 0 .../{opencl => old_opencl}/cutcp/output.c | 0 .../{opencl => old_opencl}/cutcp/output.h | 0 .../{opencl => old_opencl}/cutcp/parboil.h | 0 .../cutcp/parboil_opencl.c | 0 .../{opencl => old_opencl}/cutcp/readatom.c | 0 .../cutcp/watbox.sl40.pqr | 0 benchmarks/old_opencl/guassian/Makefile | 68 + .../guassian/OriginalParallel.c | 0 .../guassian/README.txt | 0 .../nearn => old_opencl/guassian}/clutils.cpp | 42 +- .../guassian/clutils.h | 0 .../guassian/gaussianElim.h | 0 .../guassian/gettimeofday.cpp | 0 .../guassian/gettimeofday.h | 0 .../guassian/kernel.cl | 0 .../guassian/libgaussian.a | Bin .../guassian/main.cc | 7 +- .../guassian/matrix4.txt | 0 .../{new_opencl => old_opencl}/guassian/run | 0 .../guassian/utils.cpp | 0 .../guassian/utils.h | 0 .../include/CL/cl.h | 0 .../include/CL/cl.hpp | 0 .../include/CL/cl2.hpp | 0 .../include/CL/cl_d3d10.h | 0 .../include/CL/cl_d3d11.h | 0 .../include/CL/cl_dx9_media_sharing.h | 0 .../include/CL/cl_dx9_media_sharing_intel.h | 0 .../include/CL/cl_egl.h | 0 .../include/CL/cl_ext.h | 0 .../include/CL/cl_ext_intel.h | 0 .../include/CL/cl_gl.h | 0 .../include/CL/cl_gl_ext.h | 0 .../include/CL/cl_platform.h | 0 .../CL/cl_va_api_media_sharing_intel.h | 0 .../include/CL/cl_version.h | 0 .../include/CL/opencl.h | 0 benchmarks/old_opencl/kmeans/Makefile | 79 + .../{new_opencl => old_opencl}/kmeans/README | 0 .../kmeans/cluster.c | 0 .../kmeans/getopt.c | 2366 ++++++++--------- .../kmeans/getopt.h | 382 +-- .../kmeans/kernel.cl | 122 +- .../{opencl => old_opencl}/kmeans/kmeans | Bin .../kmeans/kmeans.h | 0 .../kmeans/kmeans_clustering.c | 352 +-- .../{opencl => old_opencl}/kmeans/libkmeans.a | Bin .../{new_opencl => old_opencl}/kmeans/main.cc | 776 +++--- .../kmeans/read_input.c | 0 .../{new_opencl => old_opencl}/kmeans/rmse.c | 0 benchmarks/{opencl => old_opencl}/kmeans/run | 0 .../lbm/120_120_150_ldc.of | 0 .../{opencl => old_opencl}/lbm/Makefile | 0 benchmarks/{opencl => old_opencl}/lbm/args.c | 0 .../{opencl => old_opencl}/lbm/gpu_info.c | 0 .../{opencl => old_opencl}/lbm/gpu_info.h | 0 .../{opencl => old_opencl}/lbm/kernel.cl | 0 .../lbm/layout_config.h | 0 benchmarks/{opencl => old_opencl}/lbm/lbm.c | 0 benchmarks/{opencl => old_opencl}/lbm/lbm.h | 0 .../{opencl => old_opencl}/lbm/lbm_macros.h | 0 .../{opencl => old_opencl}/lbm/liblbm.a | Bin benchmarks/{opencl => old_opencl}/lbm/main.cc | 0 benchmarks/{opencl => old_opencl}/lbm/main.h | 0 benchmarks/{opencl => old_opencl}/lbm/ocl.c | 0 benchmarks/{opencl => old_opencl}/lbm/ocl.h | 0 .../{opencl => old_opencl}/lbm/parboil.h | 0 .../lbm/parboil_opencl.c | 0 .../{opencl => old_opencl}/lib/libOpenCL.a | Bin .../mri-q/32_32_32_dataset.bin | Bin .../{opencl => old_opencl}/mri-q/Makefile | 0 .../{opencl => old_opencl}/mri-q/args.c | 0 .../{opencl => old_opencl}/mri-q/computeQ.c | 0 .../{opencl => old_opencl}/mri-q/computeQ.h | 0 .../{opencl => old_opencl}/mri-q/file.cc | 0 .../{opencl => old_opencl}/mri-q/file.h | 0 .../{opencl => old_opencl}/mri-q/gpu_info.c | 0 .../{opencl => old_opencl}/mri-q/gpu_info.h | 0 .../{opencl => old_opencl}/mri-q/kernel.cl | 0 .../{opencl => old_opencl}/mri-q/libmri-q.a | Bin .../{opencl => old_opencl}/mri-q/libsgemm.a | Bin .../{opencl => old_opencl}/mri-q/macros.h | 0 .../{opencl => old_opencl}/mri-q/main.cc | 0 .../{opencl => old_opencl}/mri-q/ocl copy.c | 0 .../{opencl => old_opencl}/mri-q/ocl copy.h | 0 benchmarks/{opencl => old_opencl}/mri-q/ocl.c | 0 benchmarks/{opencl => old_opencl}/mri-q/ocl.h | 0 .../{opencl => old_opencl}/mri-q/parboil.h | 0 .../mri-q/parboil_opencl.c | 0 benchmarks/old_opencl/nearn/Makefile | 68 + .../nearn/README.txt | 0 .../nearn/cane4_0.db | 0 .../nearn/cane4_1.db | 0 .../nearn/cane4_2.db | 0 .../nearn/cane4_3.db | 0 .../guassian => old_opencl/nearn}/clutils.cpp | 38 +- .../nearn/clutils.h | 0 .../nearn/filelist.txt | 0 .../nearn/gettimeofday.cpp | 0 .../nearn/gettimeofday.h | 0 .../{new_opencl => old_opencl}/nearn/ipoint.h | 0 .../nearn/kernel.cl | 0 .../{opencl => old_opencl}/nearn/libnearn.a | Bin .../{new_opencl => old_opencl}/nearn/main.cc | 0 .../nearn/nearestNeighbor.h | 0 .../{new_opencl => old_opencl}/nearn/run | 0 .../nearn/utils.cpp | 0 .../{new_opencl => old_opencl}/nearn/utils.h | 0 .../{opencl => old_opencl}/reduce0/Makefile | 0 .../{opencl => old_opencl}/reduce0/main.cc | 0 .../reduce0/oclReduction.h | 0 .../reduce0/oclReduction_kernel.cl | 0 .../{opencl => old_opencl}/reduce0/oclUtils.h | 0 .../reduce0/shrQATest.h | 0 .../{opencl => old_opencl}/reduce0/shrUtils.h | 0 .../{opencl => old_opencl}/sad/DESCRIPTION | 0 .../{opencl => old_opencl}/sad/Makefile | 0 .../sad/OpenCL_common.cpp | 0 .../sad/OpenCL_common.h | 0 benchmarks/{opencl => old_opencl}/sad/args.c | 0 benchmarks/{opencl => old_opencl}/sad/file.c | 0 benchmarks/{opencl => old_opencl}/sad/file.h | 0 .../{opencl => old_opencl}/sad/frame.bin | Bin .../{opencl => old_opencl}/sad/gpu_info.c | 0 .../{opencl => old_opencl}/sad/gpu_info.h | 0 benchmarks/{opencl => old_opencl}/sad/image.c | 0 benchmarks/{opencl => old_opencl}/sad/image.h | 0 .../{opencl => old_opencl}/sad/kernel.cl | 0 .../{opencl => old_opencl}/sad/libsad.a | Bin benchmarks/{opencl => old_opencl}/sad/main.cc | 0 benchmarks/{opencl => old_opencl}/sad/ocl.c | 0 benchmarks/{opencl => old_opencl}/sad/ocl.h | 0 .../{opencl => old_opencl}/sad/parboil.h | 0 .../sad/parboil_opencl.c | 0 .../{opencl => old_opencl}/sad/reference.bin | Bin benchmarks/{opencl => old_opencl}/sad/sad.h | 0 .../{opencl => old_opencl}/sad/sad_kernel.h | 0 benchmarks/old_opencl/saxpy/Makefile | 68 + .../{new_opencl => old_opencl}/saxpy/README | 0 .../saxpy/kernel.cl | 0 .../{opencl => old_opencl}/saxpy/libsaxpy.a | Bin .../{new_opencl => old_opencl}/saxpy/main.cc | 40 +- benchmarks/old_opencl/sfilter/Makefile | 68 + .../{new_opencl => old_opencl}/sfilter/README | 0 .../sfilter/kernel.cl | 0 .../sfilter/libsfilter.a | Bin .../sfilter/main.cc | 38 +- benchmarks/old_opencl/sgemm/Makefile | 68 + .../{new_opencl => old_opencl}/sgemm/README | 0 .../sgemm/kernel.cl | 0 .../{opencl => old_opencl}/sgemm/libsgemm.a | Bin .../{new_opencl => old_opencl}/sgemm/main.cc | 40 +- .../{opencl => old_opencl}/spmv/1138_bus.mtx | 0 .../{opencl => old_opencl}/spmv/DESCRIPTION | 0 .../{opencl => old_opencl}/spmv/Makefile | 0 benchmarks/{opencl => old_opencl}/spmv/args.c | 0 .../spmv/convert_dataset.c | 0 .../spmv/convert_dataset.h | 0 benchmarks/{opencl => old_opencl}/spmv/file.c | 0 benchmarks/{opencl => old_opencl}/spmv/file.h | 0 .../{opencl => old_opencl}/spmv/gpu_info.c | 0 .../{opencl => old_opencl}/spmv/gpu_info.h | 0 .../spmv/input/1138_bus.mtx | 0 .../spmv/input/1138_bus.mtx.bin | Bin .../spmv/input/DESCRIPTION | 0 .../spmv/input/vector.bin | Bin .../{opencl => old_opencl}/spmv/kernel.cl | 0 .../{opencl => old_opencl}/spmv/libspmv.a | Bin .../{opencl => old_opencl}/spmv/main.cc | 0 benchmarks/{opencl => old_opencl}/spmv/mmio.c | 0 benchmarks/{opencl => old_opencl}/spmv/mmio.h | 0 benchmarks/{opencl => old_opencl}/spmv/ocl.c | 0 benchmarks/{opencl => old_opencl}/spmv/ocl.h | 0 .../{opencl => old_opencl}/spmv/parboil.h | 0 .../spmv/parboil_opencl.c | 0 .../{opencl => old_opencl}/spmv/perf_util.c | 0 .../{opencl => old_opencl}/spmv/perf_util.h | 0 .../{opencl => old_opencl}/spmv/perfmon.c | 0 .../{opencl => old_opencl}/spmv/perfmon.h | 0 .../{opencl => old_opencl}/spmv/stub.cc | 0 .../{opencl => old_opencl}/spmv/vector.bin | Bin .../stencil/128x128x32.bin | Bin .../{opencl => old_opencl}/stencil/Makefile | 0 .../{opencl => old_opencl}/stencil/args.c | 0 .../{opencl => old_opencl}/stencil/file.c | 0 .../{opencl => old_opencl}/stencil/file.h | 0 .../{opencl => old_opencl}/stencil/gpu_info.c | 0 .../{opencl => old_opencl}/stencil/gpu_info.h | 0 .../{opencl => old_opencl}/stencil/kernel.cl | 0 .../stencil/libstencil.a | Bin .../{opencl => old_opencl}/stencil/main.cc | 0 .../{opencl => old_opencl}/stencil/ocl.c | 0 .../{opencl => old_opencl}/stencil/ocl.h | 0 .../{opencl => old_opencl}/stencil/parboil.h | 0 .../stencil/parboil_opencl.c | 0 benchmarks/old_opencl/transpose/Makefile | 66 + .../transpose/main.cc | 32 +- .../transpose/oclUtils.h | 394 +-- .../transpose/shrQATest.h | 474 ++-- .../transpose/shrUtils.h | 1282 ++++----- .../transpose/transpose.cl | 0 .../transpose/transpose_gold.cpp | 0 benchmarks/old_opencl/vecadd/Makefile | 68 + .../{new_opencl => old_opencl}/vecadd/README | 0 .../vecadd/kernel.cl | 0 .../{opencl => old_opencl}/vecadd/libvecadd.a | Bin .../{new_opencl => old_opencl}/vecadd/main.cc | 66 +- benchmarks/opencl/bfs/CLHelper.h | 38 +- benchmarks/opencl/bfs/Makefile | 83 +- .../{new_opencl => opencl}/bfs/kernel.pocl | Bin benchmarks/opencl/bfs/main.cc | 2 +- .../compiler/bin/poclcc | Bin .../compiler/lib/libOpenCL.so | 0 .../compiler/lib/libOpenCL.so.2 | 0 .../compiler/lib/libOpenCL.so.2.5.0 | Bin .../share/pocl/include/_builtin_renames.h | 0 .../share/pocl/include/_clang_opencl.h | 0 .../share/pocl/include/_enable_all_exts.h | 0 .../compiler/share/pocl/include/_kernel.h | 0 .../compiler/share/pocl/include/_kernel_c.h | 0 .../share/pocl/include/_kernel_constants.h | 0 .../share/pocl/include/opencl-c-base.h | 0 .../compiler/share/pocl/include/opencl-c.h | 0 .../compiler/share/pocl/include/pocl.h | 0 .../compiler/share/pocl/include/pocl_device.h | 0 .../share/pocl/include/pocl_image_types.h | 0 .../compiler/share/pocl/include/pocl_spir.h | 0 .../compiler/share/pocl/include/pocl_types.h | 0 ...nel-riscv32-unknown-unknown-elf-skylake.bc | Bin .../convolution/.gitignore | 0 benchmarks/opencl/convolution/Makefile | 83 +- benchmarks/opencl/convolution/kernel.cl | 106 +- benchmarks/opencl/convolution/main.cpp | 520 ++-- benchmarks/opencl/guassian/Makefile | 85 +- benchmarks/opencl/guassian/clutils.cpp | 36 +- .../guassian/kernel.pocl | Bin benchmarks/opencl/guassian/main.cc | 7 +- .../{new_opencl => opencl}/kmeans/.gitignore | 0 benchmarks/opencl/kmeans/Makefile | 96 +- benchmarks/opencl/kmeans/getopt.c | 2366 ++++++++--------- benchmarks/opencl/kmeans/getopt.h | 382 +-- benchmarks/opencl/kmeans/kernel.cl | 122 +- .../{new_opencl => opencl}/kmeans/kernel.pocl | Bin benchmarks/opencl/kmeans/kmeans_clustering.c | 352 +-- benchmarks/opencl/kmeans/main.cc | 776 +++--- .../{new_opencl => opencl}/lib/libOpenCL.so | Bin .../{new_opencl => opencl}/lib/libOpenCL.so.2 | Bin .../lib/libOpenCL.so.2.5.0 | Bin benchmarks/opencl/nearn/Makefile | 83 +- benchmarks/opencl/nearn/clutils.cpp | 40 +- .../{new_opencl => opencl}/nearn/kernel.pocl | Bin benchmarks/{new_opencl => opencl}/results.txt | 0 .../runtime/include/CL/cl.h | 0 .../runtime/include/CL/cl.hpp | 0 .../runtime/include/CL/cl2.hpp | 0 .../runtime/include/CL/cl_d3d10.h | 0 .../runtime/include/CL/cl_d3d11.h | 0 .../runtime/include/CL/cl_dx9_media_sharing.h | 0 .../include/CL/cl_dx9_media_sharing_intel.h | 0 .../runtime/include/CL/cl_egl.h | 0 .../runtime/include/CL/cl_ext.h | 0 .../runtime/include/CL/cl_ext_intel.h | 0 .../runtime/include/CL/cl_gl.h | 0 .../runtime/include/CL/cl_gl_ext.h | 0 .../runtime/include/CL/cl_platform.h | 0 .../CL/cl_va_api_media_sharing_intel.h | 0 .../runtime/include/CL/cl_version.h | 0 .../runtime/include/CL/opencl.h | 0 .../runtime/lib/libOpenCL.so | 0 .../runtime/lib/libOpenCL.so.2 | 0 .../runtime/lib/libOpenCL.so.2.5.0 | Bin benchmarks/opencl/saxpy/Makefile | 85 +- .../{new_opencl => opencl}/saxpy/kernel.pocl | Bin benchmarks/opencl/saxpy/main.cc | 40 +- benchmarks/opencl/sfilter/Makefile | 85 +- .../sfilter/kernel.pocl | Bin benchmarks/opencl/sfilter/main.cc | 38 +- benchmarks/opencl/sgemm/Makefile | 83 +- .../{new_opencl => opencl}/sgemm/kernel.pocl | Bin benchmarks/opencl/sgemm/main.cc | 40 +- benchmarks/{new_opencl => opencl}/sgemm/sgemm | Bin .../transpose/.gitignore | 0 benchmarks/opencl/transpose/Makefile | 85 +- benchmarks/opencl/transpose/main.cc | 32 +- benchmarks/opencl/transpose/oclUtils.h | 394 +-- benchmarks/opencl/transpose/shrQATest.h | 474 ++-- benchmarks/opencl/transpose/shrUtils.h | 1282 ++++----- .../{new_opencl => opencl}/vecadd/.gitignore | 0 benchmarks/opencl/vecadd/Makefile | 83 +- .../{new_opencl => opencl}/vecadd/kernel.pocl | Bin benchmarks/opencl/vecadd/main.cc | 66 +- .../export_cycle_counts.py | 0 .../perf_2019_11_25}/saxpy.result | 0 .../perf_2019_11_25}/sfilter.result | 0 .../perf_2019_11_25}/sgemm.result | 0 .../perf_2019_11_25}/sgemm_ipc.result | 0 .../perf_2019_11_25}/test_all.sh | 0 .../perf_2019_11_25}/test_bench.sh | 0 run_tests.sh => evaluation/run_tests.sh | 0 {results => evaluation}/synth_data.csv | 0 {rtl => hw}/configs/.gitignore | 0 {rtl => hw}/configs/.gitkeep | 0 {rtl => hw}/gen_config.py | 0 {rtl => hw}/gen_synth_configs.py | 0 .../memory/cln28hpc/rf2_32x128_wm1/Makefile | 0 .../memory/cln28hpc/rf2_32x128_wm1/env_vsim | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.bitmap | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.cpf | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.ctl | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.lef | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.mdt | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.memlib | 0 .../cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.tv | 0 .../cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v | 0 .../rf2_32x128_wm1/rf2_32x128_wm1_antenna.clf | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_m40c.avm | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_m40c.dat | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_m40c.lib | 0 ...32x128_wm1_ff_0p99v_0p99v_m40c.lib_sh5p1cm | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_m40c.ps | 0 .../rf2_32x128_wm1/rf2_32x128_wm1_rtl.v | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_125c.avm | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_125c.dat | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_125c.lib | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_125c.ps | 0 .../rf2_32x128_wm1_tt_0p81v_0p81v_0c.avm | 0 .../rf2_32x128_wm1_tt_0p81v_0p81v_0c.dat | 0 .../rf2_32x128_wm1_tt_0p81v_0p81v_0c.lib | 0 .../rf2_32x128_wm1_tt_0p81v_0p81v_0c.ps | 0 .../cln28hpc/rf2_32x128_wm1/testbench.cpp | 0 .../rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v | 0 .../cln28hpc/rf2_32x128_wm1/vsim/rf_tb.cr.mti | 0 .../cln28hpc/rf2_32x128_wm1/vsim/rf_tb.mpf | 0 .../cln28hpc/rf2_32x128_wm1/vsim/transcript | 0 .../cln28hpc/rf2_32x128_wm1/vsim/vsim.wlf | Bin .../rf2_32x128_wm1/vsim/work/@_opt/_lib.qdb | Bin .../vsim/work/@_opt/_lib1_0.qdb | Bin .../vsim/work/@_opt/_lib1_0.qpg | Bin .../vsim/work/@_opt/_lib1_0.qtl | Bin .../vsim/work/@_opt/_lib2_0.qdb | Bin .../vsim/work/@_opt/_lib2_0.qpg | Bin .../vsim/work/@_opt/_lib2_0.qtl | Bin .../vsim/work/@_opt/_lib3_0.qdb | Bin .../vsim/work/@_opt/_lib3_0.qpg | Bin .../vsim/work/@_opt/_lib3_0.qtl | Bin .../vsim/work/@_opt/_lib4_0.qdb | Bin .../vsim/work/@_opt/_lib4_0.qpg | Bin .../vsim/work/@_opt/_lib4_0.qtl | Bin .../vsim/work/@_opt/_lib5_0.qdb | Bin .../vsim/work/@_opt/_lib5_0.qpg | Bin .../vsim/work/@_opt/_lib5_0.qtl | Bin .../cln28hpc/rf2_32x128_wm1/vsim/work/_info | 0 .../rf2_32x128_wm1/vsim/work/_lib.qdb | Bin .../rf2_32x128_wm1/vsim/work/_lib1_0.qdb | Bin .../rf2_32x128_wm1/vsim/work/_lib1_0.qpg | Bin .../rf2_32x128_wm1/vsim/work/_lib1_0.qtl | Bin .../cln28hpc/rf2_32x128_wm1/vsim/work/_vmake | 0 .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.db | Bin .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.db | Bin .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.db | Bin .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../memory/cln28hpm/convertToDBAll.csh | 0 .../memory/cln28hpm/convert_lib_to_db.tcl | 0 .../cln28hpm/rf2_128x128_wm1/command.log | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.bitmap | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.cpf | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.ctl | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.lef | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.mdt | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.memlib | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.tv | 0 .../rf2_128x128_wm1/rf2_128x128_wm1.v | 0 .../rf2_128x128_wm1_antenna.clf | 0 .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.avm | 0 .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.dat | 0 .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.lib | 0 .../rf2_128x128_wm1_ff_0p99v_0p99v_125c.ps | 0 .../rf2_128x128_wm1/rf2_128x128_wm1_rtl.v | 0 .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.avm | 0 .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.dat | 0 .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.lib | 0 .../rf2_128x128_wm1_ss_0p81v_0p81v_m40c.ps | 0 .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.avm | 0 .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.dat | 0 .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.lib | 0 .../rf2_128x128_wm1_tt_0p90v_0p90v_25c.ps | 0 .../cln28hpm/rf2_256x128_wm1/command.log | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.bitmap | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.cpf | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.ctl | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.lef | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.mdt | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.memlib | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.tv | 0 .../rf2_256x128_wm1/rf2_256x128_wm1.v | 0 .../rf2_256x128_wm1_antenna.clf | 0 .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.avm | 0 .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.dat | 0 .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.lib | 0 .../rf2_256x128_wm1_ff_0p99v_0p99v_125c.ps | 0 .../rf2_256x128_wm1/rf2_256x128_wm1_rtl.v | 0 .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.avm | 0 .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.dat | 0 .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.lib | 0 .../rf2_256x128_wm1_ss_0p81v_0p81v_m40c.ps | 0 .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.avm | 0 .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.dat | 0 .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.lib | 0 .../rf2_256x128_wm1_tt_0p90v_0p90v_25c.ps | 0 .../cln28hpm/rf2_256x19_wm0/command.log | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.bitmap | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.cpf | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.ctl | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.lef | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.mdt | 0 .../rf2_256x19_wm0/rf2_256x19_wm0.memlib | 0 .../cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.tv | 0 .../cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v | 0 .../rf2_256x19_wm0/rf2_256x19_wm0_antenna.clf | 0 .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.avm | 0 .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.dat | 0 .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.db | Bin .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.lib | 0 .../rf2_256x19_wm0_ff_0p99v_0p99v_125c.ps | 0 .../rf2_256x19_wm0/rf2_256x19_wm0_rtl.v | 0 .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.avm | 0 .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.dat | 0 .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.db | Bin .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.lib | 0 .../rf2_256x19_wm0_ss_0p81v_0p81v_m40c.ps | 0 .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.avm | 0 .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.dat | 0 .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.db | Bin .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.lib | 0 .../rf2_256x19_wm0_tt_0p90v_0p90v_25c.ps | 0 .../cln28hpm/rf2_32x128_wm1/command.log | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.bitmap | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.cpf | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.ctl | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.lef | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.mdt | 0 .../rf2_32x128_wm1/rf2_32x128_wm1.memlib | 0 .../cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.tv | 0 .../cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v | 0 .../rf2_32x128_wm1/rf2_32x128_wm1_antenna.clf | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.avm | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.dat | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.db | Bin .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.lib | 0 .../rf2_32x128_wm1_ff_0p99v_0p99v_125c.ps | 0 .../rf2_32x128_wm1/rf2_32x128_wm1_rtl.v | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.avm | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.dat | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.db | Bin .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.lib | 0 .../rf2_32x128_wm1_ss_0p81v_0p81v_m40c.ps | 0 .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.avm | 0 .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.dat | 0 .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.db | Bin .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.lib | 0 .../rf2_32x128_wm1_tt_0p90v_0p90v_25c.ps | 0 .../cln28hpm/rf2_32x128_wm1/vsim/Makefile | 0 .../rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v | 0 .../cln28hpm/rf2_32x128_wm1/vsim/transcript | 0 .../rf2_32x19_wm0/rf2_32x19_wm0.bitmap | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.cpf | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.ctl | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.lef | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.mdt | 0 .../rf2_32x19_wm0/rf2_32x19_wm0.memlib | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.tv | 0 .../cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.v | 0 .../rf2_32x19_wm0/rf2_32x19_wm0_antenna.clf | 0 .../rf2_32x19_wm0_ff_0p99v_0p99v_125c.avm | 0 .../rf2_32x19_wm0_ff_0p99v_0p99v_125c.dat | 0 .../rf2_32x19_wm0_ff_0p99v_0p99v_125c.lib | 0 .../rf2_32x19_wm0_ff_0p99v_0p99v_125c.ps | 0 .../rf2_32x19_wm0/rf2_32x19_wm0_rtl.v | 0 .../rf2_32x19_wm0_ss_0p81v_0p81v_m40c.avm | 0 .../rf2_32x19_wm0_ss_0p81v_0p81v_m40c.dat | 0 .../rf2_32x19_wm0_ss_0p81v_0p81v_m40c.lib | 0 .../rf2_32x19_wm0_ss_0p81v_0p81v_m40c.ps | 0 .../rf2_32x19_wm0_tt_0p90v_0p90v_25c.avm | 0 .../rf2_32x19_wm0_tt_0p90v_0p90v_25c.dat | 0 .../rf2_32x19_wm0_tt_0p90v_0p90v_25c.lib | 0 .../rf2_32x19_wm0_tt_0p90v_0p90v_25c.ps | 0 {old_rtl => hw}/modelsim/Makefile | 0 {old_rtl => hw}/modelsim/cshrc.modelsim | 0 {old_rtl => hw}/modelsim/modelsim.mpf | 0 {old_rtl => hw}/modelsim/vortex_dpi.cpp | 0 {old_rtl => hw}/modelsim/vortex_dpi.h | 0 {old_rtl => hw}/modelsim/vortex_tb.v | 0 {old_rtl => hw}/modelsim/work/_info | 0 {old_rtl => hw}/modelsim/work/_lib.qdb | Bin {old_rtl => hw}/modelsim/work/_lib1_0.qdb | Bin {old_rtl => hw}/modelsim/work/_lib1_0.qpg | Bin {old_rtl => hw}/modelsim/work/_lib1_0.qtl | Bin {old_rtl => hw}/modelsim/work/_vmake | 0 {old_rtl => hw/old_rtl}/Makefile | 0 {old_rtl => hw/old_rtl}/VX_alu.v | 0 {old_rtl => hw/old_rtl}/VX_back_end.v | 0 {old_rtl => hw/old_rtl}/VX_countones.v | 0 {old_rtl => hw/old_rtl}/VX_csr_data.v | 0 {old_rtl => hw/old_rtl}/VX_csr_handler.v | 0 {old_rtl => hw/old_rtl}/VX_csr_pipe.v | 0 {old_rtl => hw/old_rtl}/VX_csr_wrapper.v | 0 {old_rtl => hw/old_rtl}/VX_decode.v | 0 {old_rtl => hw/old_rtl}/VX_define.v | 0 {old_rtl => hw/old_rtl}/VX_define_synth.v | 0 {old_rtl => hw/old_rtl}/VX_dmem_controller.v | 0 {old_rtl => hw/old_rtl}/VX_execute_unit.v | 0 {old_rtl => hw/old_rtl}/VX_fetch.v | 0 {old_rtl => hw/old_rtl}/VX_front_end.v | 0 .../old_rtl}/VX_generic_priority_encoder.v | 0 {old_rtl => hw/old_rtl}/VX_generic_register.v | 0 {old_rtl => hw/old_rtl}/VX_generic_stack.v | 0 {old_rtl => hw/old_rtl}/VX_gpgpu_inst.v | 0 {old_rtl => hw/old_rtl}/VX_gpr.v | 0 {old_rtl => hw/old_rtl}/VX_gpr_stage.v | 0 {old_rtl => hw/old_rtl}/VX_gpr_wrapper.v | 0 {old_rtl => hw/old_rtl}/VX_inst_multiplex.v | 0 {old_rtl => hw/old_rtl}/VX_lsu.v | 0 {old_rtl => hw/old_rtl}/VX_lsu_addr_gen.v | 0 {old_rtl => hw/old_rtl}/VX_priority_encoder.v | 0 .../old_rtl}/VX_priority_encoder_w_mask.v | 0 {old_rtl => hw/old_rtl}/VX_scheduler.v | 0 {old_rtl => hw/old_rtl}/VX_warp.v | 0 {old_rtl => hw/old_rtl}/VX_warp_scheduler.v | 0 {old_rtl => hw/old_rtl}/VX_writeback.v | 0 {old_rtl => hw/old_rtl}/Vortex.v | 0 .../byte_enabled_simple_dual_port_ram.v | 0 {old_rtl => hw/old_rtl}/cache/Makefile | 0 {old_rtl => hw/old_rtl}/cache/Notes | 0 {old_rtl => hw/old_rtl}/cache/VX_Cache_Bank.v | 0 .../old_rtl}/cache/VX_cache_bank_valid.v | 0 {old_rtl => hw/old_rtl}/cache/VX_cache_data.v | 0 .../old_rtl}/cache/VX_cache_data_per_index.v | 0 {old_rtl => hw/old_rtl}/cache/VX_d_cache.v | 0 .../old_rtl}/cache/VX_d_cache_encapsulate.v | 0 {old_rtl => hw/old_rtl}/cache/VX_d_cache_tb.v | 0 {old_rtl => hw/old_rtl}/cache/VX_generic_pe.v | 0 {old_rtl => hw/old_rtl}/cache/cache_set.v | 0 .../old_rtl}/cache/d_cache_test_bench.cpp | 0 .../old_rtl}/cache/d_cache_test_bench.h | 0 .../old_rtl}/cache/d_cache_test_bench_debug.h | 0 .../interfaces/VX_branch_response_inter.v | 0 .../old_rtl}/interfaces/VX_csr_req_inter.v | 0 .../old_rtl}/interfaces/VX_csr_wb_inter.v | 0 .../interfaces/VX_dcache_request_inter.v | 0 .../interfaces/VX_dcache_response_inter.v | 0 .../interfaces/VX_dram_req_rsp_inter.v | 0 .../interfaces/VX_exec_unit_req_inter.v | 0 .../interfaces/VX_frE_to_bckE_req_inter.v | 0 .../old_rtl}/interfaces/VX_gpr_clone_inter.v | 0 .../old_rtl}/interfaces/VX_gpr_data_inter.v | 0 .../old_rtl}/interfaces/VX_gpr_jal_inter.v | 0 .../old_rtl}/interfaces/VX_gpr_read_inter.v | 0 .../old_rtl}/interfaces/VX_gpr_wspawn_inter.v | 0 .../interfaces/VX_gpu_inst_req_inter.v | 0 .../interfaces/VX_icache_request_inter.v | 0 .../interfaces/VX_icache_response_inter.v | 0 .../interfaces/VX_inst_exec_wb_inter.v | 0 .../interfaces/VX_inst_mem_wb_inter.v | 0 .../old_rtl}/interfaces/VX_inst_meta_inter.v | 0 .../interfaces/VX_jal_response_inter.v | 0 .../old_rtl}/interfaces/VX_join_inter.v | 0 .../old_rtl}/interfaces/VX_lsu_req_inter.v | 0 .../old_rtl}/interfaces/VX_mem_req_inter.v | 0 .../old_rtl}/interfaces/VX_mw_wb_inter.v | 0 .../old_rtl}/interfaces/VX_warp_ctl_inter.v | 0 .../old_rtl}/interfaces/VX_wb_inter.v | 0 .../old_rtl}/interfaces/VX_wstall_inter.v | 0 {rtl => hw/old_rtl}/modelsim/Makefile | 0 {rtl => hw/old_rtl}/modelsim/cshrc.modelsim | 0 {rtl => hw/old_rtl}/modelsim/modelsim.mpf | 0 {rtl => hw/old_rtl}/modelsim/vortex_dpi.cpp | 0 {rtl => hw/old_rtl}/modelsim/vortex_dpi.h | 0 {rtl => hw/old_rtl}/modelsim/vortex_tb.v | 0 {rtl => hw/old_rtl}/modelsim/work/_info | 0 {rtl => hw/old_rtl}/modelsim/work/_lib.qdb | Bin {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qdb | Bin {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qpg | Bin {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qtl | Bin {rtl => hw/old_rtl}/modelsim/work/_vmake | 0 .../old_rtl}/pipe_regs/VX_d_e_reg.v | 0 .../old_rtl}/pipe_regs/VX_f_d_reg.v | 0 {old_rtl => hw/old_rtl}/quartus/Makefile | 0 .../old_rtl}/quartus/VX_gpr_syn.qpf | 0 .../old_rtl}/quartus/VX_gpr_syn.qsf | 0 {old_rtl => hw/old_rtl}/quartus/asm.chg | 0 {old_rtl => hw/old_rtl}/quartus/fit.chg | 0 {old_rtl => hw/old_rtl}/quartus/map.chg | 0 {old_rtl => hw/old_rtl}/quartus/project.tcl | 0 {old_rtl => hw/old_rtl}/quartus/smart.log | 0 {old_rtl => hw/old_rtl}/quartus/sta.chg | 0 {old_rtl => hw/old_rtl}/quartus/syn.chg | 0 {old_rtl => hw/old_rtl}/quartus/vortex.ini | 0 {old_rtl => hw/old_rtl}/quartus/vortex.sdc | 0 {old_rtl => hw/old_rtl}/results.txt | 0 .../old_rtl}/shared_memory/VX_bank_valids.v | 0 .../shared_memory/VX_priority_encoder_sm.v | 0 .../old_rtl}/shared_memory/VX_shared_memory.v | 0 .../shared_memory/VX_shared_memory_block.v | 0 {old_rtl => hw/old_rtl}/simulate/VX_define.h | 0 {old_rtl => hw/old_rtl}/simulate/ram.h | 0 {old_rtl => hw/old_rtl}/simulate/tb_debug.h | 0 .../old_rtl}/simulate/test_bench.cpp | 0 {old_rtl => hw/old_rtl}/simulate/test_bench.h | 0 {driver/hw => hw/opae}/Makefile | 0 {driver/hw => hw/opae}/README | 0 {driver/hw => hw/opae}/ccip_interface_reg.sv | 0 {driver/hw => hw/opae}/ccip_std_afu.sv | 0 {driver => hw/opae}/opae_setup.sh | 0 {driver => hw/opae}/set_env.sh | 0 {driver/hw => hw/opae}/sources.txt | 0 {driver/hw => hw/opae}/vortex_afu.json | 0 {driver/hw => hw/opae}/vortex_afu.sv | 0 {driver/hw => hw/opae}/wave.do | 0 {rtl => hw/rtl}/.DS_Store | Bin {rtl => hw/rtl}/.gitignore | 0 {rtl => hw/rtl}/Makefile | 0 {rtl => hw/rtl}/VX_alu.v | 0 {rtl => hw/rtl}/VX_back_end.v | 0 {rtl => hw/rtl}/VX_cache/VX_bank.v | 0 {rtl => hw/rtl}/VX_cache/VX_cache.v | 0 {rtl => hw/rtl}/VX_cache/VX_cache_config.v | 0 .../VX_cache/VX_cache_core_req_bank_sel.v | 0 {rtl => hw/rtl}/VX_cache/VX_cache_dfq_queue.v | 0 .../rtl}/VX_cache/VX_cache_dram_req_arb.v | 0 .../rtl}/VX_cache/VX_cache_miss_resrv.v | 0 {rtl => hw/rtl}/VX_cache/VX_cache_req_queue.v | 0 .../rtl}/VX_cache/VX_cache_wb_sel_merge.v | 0 .../VX_cache/VX_dcache_llv_resp_bank_sel.v | 0 .../rtl}/VX_cache/VX_fill_invalidator.v | 0 {rtl => hw/rtl}/VX_cache/VX_mrv_queue.v | 0 {rtl => hw/rtl}/VX_cache/VX_prefetcher.v | 0 {rtl => hw/rtl}/VX_cache/VX_snp_fwd_arb.v | 0 {rtl => hw/rtl}/VX_cache/VX_tag_data_access.v | 0 .../rtl}/VX_cache/VX_tag_data_structure.v | 0 {rtl => hw/rtl}/VX_countones.v | 0 {rtl => hw/rtl}/VX_csr_data.v | 0 {rtl => hw/rtl}/VX_csr_handler.v | 0 {rtl => hw/rtl}/VX_csr_pipe.v | 0 {rtl => hw/rtl}/VX_csr_wrapper.v | 0 {rtl => hw/rtl}/VX_decode.v | 0 {rtl => hw/rtl}/VX_define.v | 0 {rtl => hw/rtl}/VX_dmem_controller.v | 0 {rtl => hw/rtl}/VX_execute_unit.v | 0 {rtl => hw/rtl}/VX_fetch.v | 0 {rtl => hw/rtl}/VX_front_end.v | 0 {rtl => hw/rtl}/VX_generic_priority_encoder.v | 0 {rtl => hw/rtl}/VX_generic_queue.v | 0 {rtl => hw/rtl}/VX_generic_queue_ll.v | 0 {rtl => hw/rtl}/VX_generic_register.v | 0 {rtl => hw/rtl}/VX_generic_stack.v | 0 {rtl => hw/rtl}/VX_gpgpu_inst.v | 0 {rtl => hw/rtl}/VX_gpr.v | 0 {rtl => hw/rtl}/VX_gpr_stage.v | 0 {rtl => hw/rtl}/VX_gpr_wrapper.v | 0 {rtl => hw/rtl}/VX_icache_stage.v | 0 {rtl => hw/rtl}/VX_inst_multiplex.v | 0 {rtl => hw/rtl}/VX_lsu.v | 0 {rtl => hw/rtl}/VX_lsu_addr_gen.v | 0 {rtl => hw/rtl}/VX_priority_encoder.v | 0 {rtl => hw/rtl}/VX_priority_encoder_w_mask.v | 0 {rtl => hw/rtl}/VX_scheduler.v | 0 {rtl => hw/rtl}/VX_warp.v | 0 {rtl => hw/rtl}/VX_warp_scheduler.v | 0 {rtl => hw/rtl}/VX_writeback.v | 0 {rtl => hw/rtl}/Vortex.v | 0 {rtl => hw/rtl}/Vortex_Cluster.v | 0 {rtl => hw/rtl}/Vortex_SOC.v | 0 .../rtl}/byte_enabled_simple_dual_port_ram.v | 0 {rtl => hw/rtl}/cache/Makefile | 0 {rtl => hw/rtl}/cache/Notes | 0 {rtl => hw/rtl}/cache/VX_Cache_Bank.v | 0 {rtl => hw/rtl}/cache/VX_cache_bank_valid.v | 0 {rtl => hw/rtl}/cache/VX_cache_data.v | 0 .../rtl}/cache/VX_cache_data_per_index.v | 0 {rtl => hw/rtl}/cache/VX_d_cache.v | 0 .../rtl}/cache/VX_d_cache_encapsulate.v | 0 {rtl => hw/rtl}/cache/VX_d_cache_tb.v | 0 {rtl => hw/rtl}/cache/VX_generic_pe.v | 0 {rtl => hw/rtl}/cache/cache_set.v | 0 {rtl => hw/rtl}/cache/d_cache_test_bench.cpp | 0 {rtl => hw/rtl}/cache/d_cache_test_bench.h | 0 .../rtl}/cache/d_cache_test_bench_debug.h | 0 {rtl => hw/rtl}/compat/VX_divide.v | 0 {rtl => hw/rtl}/compat/VX_mult.v | 0 {rtl => hw/rtl}/compat/VX_tb_divide.sv | 0 .../interfaces/VX_branch_response_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_csr_req_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_csr_wb_inter.v | 0 .../rtl}/interfaces/VX_dcache_request_inter.v | 0 .../interfaces/VX_dcache_response_inter.v | 0 .../rtl}/interfaces/VX_dram_req_rsp_inter.v | 0 .../rtl}/interfaces/VX_exec_unit_req_inter.v | 0 .../interfaces/VX_frE_to_bckE_req_inter.v | 0 .../rtl}/interfaces/VX_gpr_clone_inter.v | 0 .../rtl}/interfaces/VX_gpr_data_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_gpr_jal_inter.v | 0 .../rtl}/interfaces/VX_gpr_read_inter.v | 0 .../rtl}/interfaces/VX_gpr_wspawn_inter.v | 0 .../interfaces/VX_gpu_dcache_dram_req_inter.v | 0 .../interfaces/VX_gpu_dcache_dram_res_inter.v | 0 .../rtl}/interfaces/VX_gpu_dcache_req_inter.v | 0 .../rtl}/interfaces/VX_gpu_dcache_res_inter.v | 0 .../interfaces/VX_gpu_dcache_snp_req_inter.v | 0 .../rtl}/interfaces/VX_gpu_inst_req_inter.v | 0 .../rtl}/interfaces/VX_gpu_snp_req_rsp.v | 0 .../rtl}/interfaces/VX_icache_request_inter.v | 0 .../interfaces/VX_icache_response_inter.v | 0 .../rtl}/interfaces/VX_inst_exec_wb_inter.v | 0 .../rtl}/interfaces/VX_inst_mem_wb_inter.v | 0 .../rtl}/interfaces/VX_inst_meta_inter.v | 0 .../rtl}/interfaces/VX_jal_response_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_join_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_lsu_req_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_mem_req_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_mw_wb_inter.v | 0 .../rtl}/interfaces/VX_warp_ctl_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_wb_inter.v | 0 {rtl => hw/rtl}/interfaces/VX_wstall_inter.v | 0 {rtl => hw/rtl}/pipe_regs/VX_d_e_reg.v | 0 {rtl => hw/rtl}/pipe_regs/VX_f_d_reg.v | 0 {rtl => hw/rtl}/pipe_regs/VX_i_d_reg.v | 0 .../rtl}/shared_memory/VX_bank_valids.v | 0 .../shared_memory/VX_priority_encoder_sm.v | 0 .../rtl}/shared_memory/VX_shared_memory.v | 0 .../shared_memory/VX_shared_memory_block.v | 0 .../quartus => hw/syn/quartus/cache}/Makefile | 0 .../syn/quartus/cache}/project.sdc | 0 .../syn/quartus/cache}/project.tcl | 0 {rtl/quartus => hw/syn/quartus/top}/Makefile | 0 .../syn/quartus/top}/VX_timing.tcl | 0 .../syn/quartus/top}/project.tcl | 0 .../quartus => hw/syn/quartus/top}/vortex.sdc | 0 .../syn/quartus/vx_cache}/Makefile | 0 .../syn/quartus/vx_cache}/project.sdc | 0 .../syn/quartus/vx_cache}/project.tcl | 0 {syn => hw/syn/synopsys}/191017.log | 0 {syn => hw/syn/synopsys}/Makefile | 0 {syn => hw/syn/synopsys}/NanGate_15nm_OCL.db | Bin {syn => hw/syn/synopsys}/Vortex.ddc | Bin {syn => hw/syn/synopsys}/Vortex.netlist.v | 0 {syn => hw/syn/synopsys}/Vortex.sdc | 0 {syn => hw/syn/synopsys}/cshrc.dc | 0 {syn => hw/syn/synopsys}/dc.log | 0 {syn => hw/syn/synopsys}/dc_1GHz.log | 0 {syn => hw/syn/synopsys}/dc_noOpt.log | 0 {syn => hw/syn/synopsys}/esyn.tcl | 0 {syn => hw/syn/synopsys}/fsyn.tcl | 0 {syn => hw/syn/synopsys}/run_mult_synth.sh | 0 {syn => hw/syn/synopsys}/syn.tcl | 0 {syn => hw/syn/synopsys}/vortex_syn.log | 0 {rtl => hw}/unit_tests/generic_queue/Makefile | 0 .../unit_tests/generic_queue/testbench.v | 0 {rtl/simulate => hw/verilator}/ram.h | 0 {rtl/simulate => hw/verilator}/simulator.cpp | 0 {rtl/simulate => hw/verilator}/simulator.h | 0 {rtl/simulate => hw/verilator}/test_bench.cpp | 0 {driver/sw => sw/driver}/Makefile | 0 {driver/sw => sw/driver/common}/vx_utils.cpp | 0 {driver/sw => sw/driver}/include/vortex.h | 0 {driver/sw => sw/driver}/opae/Makefile | 0 {driver/sw => sw/driver}/opae/vortex.cpp | 0 {driver/sw => sw/driver}/rtlsim/.gitignore | 0 {driver/sw => sw/driver}/rtlsim/Makefile | 0 {driver/sw => sw/driver}/rtlsim/vortex.cpp | 0 {driver/sw => sw/driver}/simx/.gitignore | 0 {driver/sw => sw/driver}/simx/Makefile | 0 {driver/sw => sw/driver}/simx/vortex.cpp | 0 {driver/sw => sw/driver}/stub/Makefile | 0 {driver/sw => sw/driver}/stub/vortex.cpp | 0 {driver => sw/driver}/tests/basic/Makefile | 0 {driver => sw/driver}/tests/basic/basic | Bin {driver => sw/driver}/tests/basic/basic.cpp | 0 {driver => sw/driver}/tests/basic/kernel.bin | Bin {driver => sw/driver}/tests/basic/kernel.c | 0 {driver => sw/driver}/tests/demo/Makefile | 0 {driver => sw/driver}/tests/demo/common.h | 0 {driver => sw/driver}/tests/demo/demo | Bin {driver => sw/driver}/tests/demo/demo.cpp | 0 {driver => sw/driver}/tests/demo/kernel.bin | Bin {driver => sw/driver}/tests/demo/kernel.c | 0 {driver => sw/driver}/tests/demo/kernel.elf | Bin {driver => sw/driver}/tests/demo/run.log | 0 .../driver/tests}/dogfood/Memcpy/hw/rtl/_hdr | 0 .../dogfood/Memcpy/hw/rtl/cci_hello.json | 0 .../dogfood/Memcpy/hw/rtl/cci_hello_afu.sv | 0 .../Memcpy/hw/rtl/cci_hello_afu_working.sv | 0 .../tests}/dogfood/Memcpy/hw/rtl/sources.txt | 0 .../tests}/dogfood/Memcpy/hw/sim/setup_ase | 0 .../driver/tests}/dogfood/Memcpy/sw/Makefile | 0 .../tests}/dogfood/Memcpy/sw/cci_hello.c | 0 .../dogfood/Memcpy/sw/obj/afu_json_info.h | 0 .../tests}/dogfood/Memcpy/sw/obj/cci_hello.o | Bin {runtime => sw/runtime}/.gitignore | 0 {runtime => sw/runtime}/Makefile | 0 {runtime => sw/runtime}/fileio/fileio.h | 0 {runtime => sw/runtime}/fileio/fileio.s | 0 .../runtime}/intrinsics/vx_intrinsics.h | 0 .../runtime}/intrinsics/vx_intrinsics.s | 0 {runtime => sw/runtime}/io/vx_io.c | 0 {runtime => sw/runtime}/io/vx_io.h | 0 {runtime => sw/runtime}/io/vx_io.s | 0 {runtime => sw/runtime}/newlib/newlib.c | 0 {runtime => sw/runtime}/qemu/vx_api.c | 0 {runtime => sw/runtime}/startup/vx_start.S | 0 .../tests => sw/runtime/tests/common}/tests.c | 0 .../tests => sw/runtime/tests/common}/tests.h | 0 .../runtime/tests/common}/vx_tempelate.c | 0 .../mains => sw/runtime/tests}/dev/Makefile | 0 .../runtime/tests}/dev/vx_dev_main.c | 0 .../runtime/tests}/dev/vx_dev_main.dump | 0 .../runtime/tests}/dev/vx_dev_main.elf | Bin .../runtime/tests}/dev/vx_dev_main.hex | 0 .../mains => sw/runtime/tests}/hello/Makefile | 0 .../runtime/tests}/hello/hello.cpp | 0 .../runtime/tests}/hello/hello.dump | 0 .../runtime/tests}/hello/hello.elf | Bin .../runtime/tests}/hello/hello.hex | 0 .../runtime/tests}/nativevecadd/Makefile | 0 .../nativevecadd/include/CL/CMakeLists.txt | 0 .../tests}/nativevecadd/include/CL/cl.h | 0 .../tests}/nativevecadd/include/CL/cl.hpp | 0 .../tests}/nativevecadd/include/CL/cl2.hpp | 0 .../tests}/nativevecadd/include/CL/cl_d3d10.h | 0 .../tests}/nativevecadd/include/CL/cl_d3d11.h | 0 .../include/CL/cl_dx9_media_sharing.h | 0 .../include/CL/cl_dx9_media_sharing_intel.h | 0 .../tests}/nativevecadd/include/CL/cl_egl.h | 0 .../tests}/nativevecadd/include/CL/cl_ext.h | 0 .../nativevecadd/include/CL/cl_ext_intel.h | 0 .../tests}/nativevecadd/include/CL/cl_gl.h | 0 .../nativevecadd/include/CL/cl_gl_ext.h | 0 .../nativevecadd/include/CL/cl_platform.h | 0 .../CL/cl_va_api_media_sharing_intel.h | 0 .../nativevecadd/include/CL/cl_version.h | 0 .../tests}/nativevecadd/include/CL/opencl.h | 0 .../nativevecadd/include/CMakeLists.txt | 0 .../tests}/nativevecadd/include/OpenCL/cl.h | 0 .../tests}/nativevecadd/include/OpenCL/cl.hpp | 0 .../nativevecadd/include/OpenCL/cl_ext.h | 0 .../nativevecadd/include/OpenCL/cl_gl.h | 0 .../nativevecadd/include/OpenCL/cl_gl_ext.h | 0 .../nativevecadd/include/OpenCL/cl_platform.h | 0 .../nativevecadd/include/OpenCL/opencl.h | 0 .../nativevecadd/include/_builtin_renames.h | 0 .../nativevecadd/include/_clang_opencl.h | 0 .../nativevecadd/include/_enable_all_exts.h | 0 .../tests}/nativevecadd/include/_kernel.h | 0 .../tests}/nativevecadd/include/_kernel_c.h | 0 .../nativevecadd/include/_kernel_constants.h | 0 .../tests}/nativevecadd/include/pocl.h | 0 .../tests}/nativevecadd/include/pocl_cache.h | 0 .../include/pocl_compiler_features.h | 0 .../nativevecadd/include/pocl_context.h | 0 .../tests}/nativevecadd/include/pocl_device.h | 0 .../nativevecadd/include/pocl_file_util.h | 0 .../nativevecadd/include/pocl_image_types.h | 0 .../tests}/nativevecadd/include/pocl_spir.h | 0 .../tests}/nativevecadd/include/pocl_types.h | 0 .../include/pocl_workgroup_func.h | 0 .../tests}/nativevecadd/include/poclu.h | 0 .../tests}/nativevecadd/include/utlist.h | 0 .../tests}/nativevecadd/include/vccompat.hpp | 0 .../tests}/nativevecadd/libs/libOpenCL.a | Bin .../tests}/nativevecadd/libs/libopencl.dump | 0 .../tests}/nativevecadd/libs/libvecadd.a | Bin .../tests}/nativevecadd/libs/libvecadd.dump | 0 .../runtime/tests}/nativevecadd/libs/vecadd | Bin .../tests}/nativevecadd/libs/vecadd.dump | 0 .../tests}/nativevecadd/vx_pocl_main.c | 0 .../tests}/nativevecadd/vx_pocl_main.dump | 0 .../tests}/nativevecadd/vx_pocl_main.elf | Bin .../tests}/nativevecadd/vx_pocl_main.hex | 0 .../runtime/tests}/nlTest/Makefile | 0 .../runtime/tests}/nlTest/vx_nl_main.c | 0 .../runtime/tests}/nlTest/vx_nl_main.dump | 0 .../runtime/tests}/nlTest/vx_nl_main.elf | Bin .../runtime/tests}/nlTest/vx_nl_main.hex | 0 .../runtime/tests}/simple/Makefile | 0 .../runtime/tests}/simple/vx_simple_main.c | 0 .../runtime/tests}/simple/vx_simple_main.dump | 0 .../runtime/tests}/simple/vx_simple_main.elf | Bin .../runtime/tests}/simple/vx_simple_main.hex | 0 .../runtime/tests/simple}/vx_tempelate.c | 0 .../runtime/tests}/vecadd/Makefile | 0 .../tests}/vecadd/include/CL/CMakeLists.txt | 0 .../runtime/tests}/vecadd/include/CL/cl.h | 0 .../runtime/tests}/vecadd/include/CL/cl.hpp | 0 .../runtime/tests}/vecadd/include/CL/cl2.hpp | 0 .../tests}/vecadd/include/CL/cl_d3d10.h | 0 .../tests}/vecadd/include/CL/cl_d3d11.h | 0 .../vecadd/include/CL/cl_dx9_media_sharing.h | 0 .../include/CL/cl_dx9_media_sharing_intel.h | 0 .../runtime/tests}/vecadd/include/CL/cl_egl.h | 0 .../runtime/tests}/vecadd/include/CL/cl_ext.h | 0 .../tests}/vecadd/include/CL/cl_ext_intel.h | 0 .../runtime/tests}/vecadd/include/CL/cl_gl.h | 0 .../tests}/vecadd/include/CL/cl_gl_ext.h | 0 .../tests}/vecadd/include/CL/cl_platform.h | 0 .../CL/cl_va_api_media_sharing_intel.h | 0 .../tests}/vecadd/include/CL/cl_version.h | 0 .../runtime/tests}/vecadd/include/CL/opencl.h | 0 .../tests}/vecadd/include/CMakeLists.txt | 0 .../runtime/tests}/vecadd/include/OpenCL/cl.h | 0 .../tests}/vecadd/include/OpenCL/cl.hpp | 0 .../tests}/vecadd/include/OpenCL/cl_ext.h | 0 .../tests}/vecadd/include/OpenCL/cl_gl.h | 0 .../tests}/vecadd/include/OpenCL/cl_gl_ext.h | 0 .../vecadd/include/OpenCL/cl_platform.h | 0 .../tests}/vecadd/include/OpenCL/opencl.h | 0 .../tests}/vecadd/include/_builtin_renames.h | 0 .../tests}/vecadd/include/_clang_opencl.h | 0 .../tests}/vecadd/include/_enable_all_exts.h | 0 .../runtime/tests}/vecadd/include/_kernel.h | 0 .../runtime/tests}/vecadd/include/_kernel_c.h | 0 .../tests}/vecadd/include/_kernel_constants.h | 0 .../runtime/tests}/vecadd/include/pocl.h | 0 .../tests}/vecadd/include/pocl_cache.h | 0 .../vecadd/include/pocl_compiler_features.h | 0 .../tests}/vecadd/include/pocl_context.h | 0 .../tests}/vecadd/include/pocl_device.h | 0 .../tests}/vecadd/include/pocl_file_util.h | 0 .../tests}/vecadd/include/pocl_image_types.h | 0 .../runtime/tests}/vecadd/include/pocl_spir.h | 0 .../tests}/vecadd/include/pocl_types.h | 0 .../vecadd/include/pocl_workgroup_func.h | 0 .../runtime/tests}/vecadd/include/poclu.h | 0 .../runtime/tests}/vecadd/include/utlist.h | 0 .../tests}/vecadd/include/vccompat.hpp | 0 .../runtime/tests}/vecadd/libs/libOpenCL.a | Bin .../runtime/tests}/vecadd/libs/libopencl.dump | 0 .../runtime/tests}/vecadd/libs/libvecadd.a | Bin .../runtime/tests}/vecadd/libs/libvecadd.dump | 0 .../runtime/tests}/vecadd/libs/vecadd | Bin .../runtime/tests}/vecadd/libs/vecadd.dump | 0 .../runtime/tests}/vecadd/vecadd.cl | 0 .../runtime/tests}/vecadd/vx_pocl_main.c | 0 .../runtime/tests}/vecadd/vx_pocl_main.dump | 0 .../runtime/tests}/vecadd/vx_pocl_main.elf | Bin .../runtime/tests}/vecadd/vx_pocl_main.hex | 0 .../runtime/tests}/vector_test/Makefile | 0 .../runtime/tests}/vector_test/vx_vec.h | 0 .../runtime/tests}/vector_test/vx_vec.s | 0 .../tests}/vector_test/vx_vec_original.s | 0 .../tests}/vector_test/vx_vector_main.c | 0 .../tests}/vector_test/vx_vector_main.dump | 0 .../tests}/vector_test/vx_vector_main.elf | Bin .../tests}/vector_test/vx_vector_main.hex | 0 .../mains => sw/runtime/tests}/vortex_link.ld | 0 {runtime => sw/runtime}/vx_api/vx_api.c | 0 {runtime => sw/runtime}/vx_api/vx_api.h | 0 {rvvector => sw/rvvector}/basic/Makefile | 0 {rvvector => sw/rvvector}/basic/_1_vx_vec.s | 0 .../rvvector}/basic/_1_vx_vector_main.c | 0 .../rvvector}/basic/__vx_vector_main.c | 0 {rvvector => sw/rvvector}/basic/vx_vec.h | 0 {rvvector => sw/rvvector}/basic/vx_vec.s | 0 {rvvector => sw/rvvector}/basic/vx_vec_main.c | 0 .../rvvector}/basic/vx_vector_main.c | 0 {rvvector => sw/rvvector}/benchmark_temp/1 | 0 .../rvvector}/benchmark_temp/Makefile | 0 .../rvvector}/benchmark_temp/TO_DO_LIST | 0 .../benchmark_temp/vx_vec_benchmark.c | 0 .../benchmark_temp/vx_vec_benchmark.dump | 0 .../benchmark_temp/vx_vec_benchmark.elf | Bin .../benchmark_temp/vx_vec_benchmark.h | 0 .../benchmark_temp/vx_vec_benchmark.hex | 0 .../rvvector}/benchmark_temp/vx_vec_memcpy.s | 0 .../rvvector}/benchmark_temp/vx_vec_saxpy.s | 0 .../benchmark_temp/vx_vec_saxpy_float.s | 0 .../rvvector}/benchmark_temp/vx_vec_sgemm.s | 0 .../benchmark_temp/vx_vec_sgemm_float.s | 0 .../rvvector}/benchmark_temp/vx_vec_vsadd.s | 0 .../benchmark_temp/vx_vec_vvaddint32.s | 0 1056 files changed, 8120 insertions(+), 8120 deletions(-) delete mode 100644 benchmarks/new_opencl/bfs/Makefile delete mode 100644 benchmarks/new_opencl/convolution/Makefile delete mode 100644 benchmarks/new_opencl/guassian/Makefile delete mode 100644 benchmarks/new_opencl/kmeans/Makefile delete mode 100644 benchmarks/new_opencl/nearn/Makefile delete mode 100644 benchmarks/new_opencl/saxpy/Makefile delete mode 100644 benchmarks/new_opencl/sfilter/Makefile delete mode 100644 benchmarks/new_opencl/sgemm/Makefile delete mode 100644 benchmarks/new_opencl/transpose/Makefile delete mode 100644 benchmarks/new_opencl/vecadd/Makefile rename benchmarks/{opencl => old_opencl}/BlackScholes/BlackScholes.cl (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/Makefile (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/main.cpp (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/oclBlackScholes.pdf (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/oclBlackScholes_common.h (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/oclBlackScholes_gold.cpp (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/oclBlackScholes_launcher.cpp (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/oclUtils.h (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/shrQATest.h (100%) rename benchmarks/{opencl => old_opencl}/BlackScholes/shrUtils.h (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/DotProduct.cl (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/Makefile (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/main.cc (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/oclUtils.h (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/shrQATest.h (100%) rename benchmarks/{opencl => old_opencl}/DotProduct/shrUtils.h (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/Makefile (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/VectorHypot.cl (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/main.cc (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/oclUtils.h (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/shrQATest.h (100%) rename benchmarks/{opencl => old_opencl}/VectorHypot/shrUtils.h (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/CLHelper.h (93%) create mode 100644 benchmarks/old_opencl/bfs/Makefile rename benchmarks/{new_opencl => old_opencl}/bfs/README (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/graph4096.txt (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/bfs/libbfs.a (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/main.cc (97%) rename benchmarks/{new_opencl => old_opencl}/bfs/run (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/timer.cc (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/timer.h (100%) rename benchmarks/{new_opencl => old_opencl}/bfs/util.h (100%) create mode 100644 benchmarks/old_opencl/convolution/Makefile rename benchmarks/{new_opencl => old_opencl}/convolution/input.bmp (100%) rename benchmarks/{new_opencl => old_opencl}/convolution/kernel.cl (96%) rename benchmarks/{new_opencl => old_opencl}/convolution/main.cpp (96%) rename benchmarks/{new_opencl => old_opencl}/convolution/utils.cpp (100%) rename benchmarks/{new_opencl => old_opencl}/convolution/utils.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/Makefile (100%) rename benchmarks/{opencl => old_opencl}/cutcp/args.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/atom.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/cutcpu.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/cutoff.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/cutoff.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/excl.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/cutcp/libcutcp.a (100%) rename benchmarks/{opencl => old_opencl}/cutcp/macros.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/main.cc (100%) rename benchmarks/{opencl => old_opencl}/cutcp/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/output.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/output.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/cutcp/parboil_opencl.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/readatom.c (100%) rename benchmarks/{opencl => old_opencl}/cutcp/watbox.sl40.pqr (100%) create mode 100644 benchmarks/old_opencl/guassian/Makefile rename benchmarks/{new_opencl => old_opencl}/guassian/OriginalParallel.c (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/README.txt (100%) rename benchmarks/{new_opencl/nearn => old_opencl/guassian}/clutils.cpp (97%) rename benchmarks/{new_opencl => old_opencl}/guassian/clutils.h (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/gaussianElim.h (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/gettimeofday.cpp (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/gettimeofday.h (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/guassian/libgaussian.a (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/main.cc (96%) rename benchmarks/{new_opencl => old_opencl}/guassian/matrix4.txt (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/run (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/utils.cpp (100%) rename benchmarks/{new_opencl => old_opencl}/guassian/utils.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl.hpp (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl2.hpp (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_d3d10.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_d3d11.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_dx9_media_sharing.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_dx9_media_sharing_intel.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_egl.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_ext.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_ext_intel.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_gl.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_gl_ext.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_platform.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_va_api_media_sharing_intel.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/cl_version.h (100%) rename benchmarks/{new_opencl => old_opencl}/include/CL/opencl.h (100%) create mode 100644 benchmarks/old_opencl/kmeans/Makefile rename benchmarks/{new_opencl => old_opencl}/kmeans/README (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/cluster.c (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/getopt.c (97%) rename benchmarks/{new_opencl => old_opencl}/kmeans/getopt.h (97%) rename benchmarks/{new_opencl => old_opencl}/kmeans/kernel.cl (95%) rename benchmarks/{opencl => old_opencl}/kmeans/kmeans (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/kmeans.h (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/kmeans_clustering.c (97%) rename benchmarks/{opencl => old_opencl}/kmeans/libkmeans.a (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/main.cc (92%) rename benchmarks/{new_opencl => old_opencl}/kmeans/read_input.c (100%) rename benchmarks/{new_opencl => old_opencl}/kmeans/rmse.c (100%) rename benchmarks/{opencl => old_opencl}/kmeans/run (100%) rename benchmarks/{opencl => old_opencl}/lbm/120_120_150_ldc.of (100%) rename benchmarks/{opencl => old_opencl}/lbm/Makefile (100%) rename benchmarks/{opencl => old_opencl}/lbm/args.c (100%) rename benchmarks/{opencl => old_opencl}/lbm/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/lbm/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/lbm/layout_config.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/lbm.c (100%) rename benchmarks/{opencl => old_opencl}/lbm/lbm.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/lbm_macros.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/liblbm.a (100%) rename benchmarks/{opencl => old_opencl}/lbm/main.cc (100%) rename benchmarks/{opencl => old_opencl}/lbm/main.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/lbm/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/lbm/parboil_opencl.c (100%) rename benchmarks/{opencl => old_opencl}/lib/libOpenCL.a (100%) rename benchmarks/{opencl => old_opencl}/mri-q/32_32_32_dataset.bin (100%) rename benchmarks/{opencl => old_opencl}/mri-q/Makefile (100%) rename benchmarks/{opencl => old_opencl}/mri-q/args.c (100%) rename benchmarks/{opencl => old_opencl}/mri-q/computeQ.c (100%) rename benchmarks/{opencl => old_opencl}/mri-q/computeQ.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/file.cc (100%) rename benchmarks/{opencl => old_opencl}/mri-q/file.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/mri-q/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/mri-q/libmri-q.a (100%) rename benchmarks/{opencl => old_opencl}/mri-q/libsgemm.a (100%) rename benchmarks/{opencl => old_opencl}/mri-q/macros.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/main.cc (100%) rename benchmarks/{opencl => old_opencl}/mri-q/ocl copy.c (100%) rename benchmarks/{opencl => old_opencl}/mri-q/ocl copy.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/mri-q/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/mri-q/parboil_opencl.c (100%) create mode 100644 benchmarks/old_opencl/nearn/Makefile rename benchmarks/{new_opencl => old_opencl}/nearn/README.txt (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/cane4_0.db (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/cane4_1.db (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/cane4_2.db (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/cane4_3.db (100%) rename benchmarks/{new_opencl/guassian => old_opencl/nearn}/clutils.cpp (97%) rename benchmarks/{new_opencl => old_opencl}/nearn/clutils.h (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/filelist.txt (100%) rename benchmarks/{opencl => old_opencl}/nearn/gettimeofday.cpp (100%) rename benchmarks/{opencl => old_opencl}/nearn/gettimeofday.h (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/ipoint.h (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/nearn/libnearn.a (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/main.cc (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/nearestNeighbor.h (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/run (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/utils.cpp (100%) rename benchmarks/{new_opencl => old_opencl}/nearn/utils.h (100%) rename benchmarks/{opencl => old_opencl}/reduce0/Makefile (100%) rename benchmarks/{opencl => old_opencl}/reduce0/main.cc (100%) rename benchmarks/{opencl => old_opencl}/reduce0/oclReduction.h (100%) rename benchmarks/{opencl => old_opencl}/reduce0/oclReduction_kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/reduce0/oclUtils.h (100%) rename benchmarks/{opencl => old_opencl}/reduce0/shrQATest.h (100%) rename benchmarks/{opencl => old_opencl}/reduce0/shrUtils.h (100%) rename benchmarks/{opencl => old_opencl}/sad/DESCRIPTION (100%) rename benchmarks/{opencl => old_opencl}/sad/Makefile (100%) rename benchmarks/{opencl => old_opencl}/sad/OpenCL_common.cpp (100%) rename benchmarks/{opencl => old_opencl}/sad/OpenCL_common.h (100%) rename benchmarks/{opencl => old_opencl}/sad/args.c (100%) rename benchmarks/{opencl => old_opencl}/sad/file.c (100%) rename benchmarks/{opencl => old_opencl}/sad/file.h (100%) rename benchmarks/{opencl => old_opencl}/sad/frame.bin (100%) rename benchmarks/{opencl => old_opencl}/sad/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/sad/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/sad/image.c (100%) rename benchmarks/{opencl => old_opencl}/sad/image.h (100%) rename benchmarks/{opencl => old_opencl}/sad/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/sad/libsad.a (100%) rename benchmarks/{opencl => old_opencl}/sad/main.cc (100%) rename benchmarks/{opencl => old_opencl}/sad/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/sad/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/sad/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/sad/parboil_opencl.c (100%) rename benchmarks/{opencl => old_opencl}/sad/reference.bin (100%) rename benchmarks/{opencl => old_opencl}/sad/sad.h (100%) rename benchmarks/{opencl => old_opencl}/sad/sad_kernel.h (100%) create mode 100644 benchmarks/old_opencl/saxpy/Makefile rename benchmarks/{new_opencl => old_opencl}/saxpy/README (100%) rename benchmarks/{new_opencl => old_opencl}/saxpy/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/saxpy/libsaxpy.a (100%) rename benchmarks/{new_opencl => old_opencl}/saxpy/main.cc (87%) create mode 100644 benchmarks/old_opencl/sfilter/Makefile rename benchmarks/{new_opencl => old_opencl}/sfilter/README (100%) rename benchmarks/{new_opencl => old_opencl}/sfilter/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/sfilter/libsfilter.a (100%) rename benchmarks/{new_opencl => old_opencl}/sfilter/main.cc (91%) create mode 100644 benchmarks/old_opencl/sgemm/Makefile rename benchmarks/{new_opencl => old_opencl}/sgemm/README (100%) rename benchmarks/{new_opencl => old_opencl}/sgemm/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/sgemm/libsgemm.a (100%) rename benchmarks/{new_opencl => old_opencl}/sgemm/main.cc (89%) rename benchmarks/{opencl => old_opencl}/spmv/1138_bus.mtx (100%) rename benchmarks/{opencl => old_opencl}/spmv/DESCRIPTION (100%) rename benchmarks/{opencl => old_opencl}/spmv/Makefile (100%) rename benchmarks/{opencl => old_opencl}/spmv/args.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/convert_dataset.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/convert_dataset.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/file.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/file.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/input/1138_bus.mtx (100%) rename benchmarks/{opencl => old_opencl}/spmv/input/1138_bus.mtx.bin (100%) rename benchmarks/{opencl => old_opencl}/spmv/input/DESCRIPTION (100%) rename benchmarks/{opencl => old_opencl}/spmv/input/vector.bin (100%) rename benchmarks/{opencl => old_opencl}/spmv/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/spmv/libspmv.a (100%) rename benchmarks/{opencl => old_opencl}/spmv/main.cc (100%) rename benchmarks/{opencl => old_opencl}/spmv/mmio.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/mmio.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/parboil_opencl.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/perf_util.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/perf_util.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/perfmon.c (100%) rename benchmarks/{opencl => old_opencl}/spmv/perfmon.h (100%) rename benchmarks/{opencl => old_opencl}/spmv/stub.cc (100%) rename benchmarks/{opencl => old_opencl}/spmv/vector.bin (100%) rename benchmarks/{opencl => old_opencl}/stencil/128x128x32.bin (100%) rename benchmarks/{opencl => old_opencl}/stencil/Makefile (100%) rename benchmarks/{opencl => old_opencl}/stencil/args.c (100%) rename benchmarks/{opencl => old_opencl}/stencil/file.c (100%) rename benchmarks/{opencl => old_opencl}/stencil/file.h (100%) rename benchmarks/{opencl => old_opencl}/stencil/gpu_info.c (100%) rename benchmarks/{opencl => old_opencl}/stencil/gpu_info.h (100%) rename benchmarks/{opencl => old_opencl}/stencil/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/stencil/libstencil.a (100%) rename benchmarks/{opencl => old_opencl}/stencil/main.cc (100%) rename benchmarks/{opencl => old_opencl}/stencil/ocl.c (100%) rename benchmarks/{opencl => old_opencl}/stencil/ocl.h (100%) rename benchmarks/{opencl => old_opencl}/stencil/parboil.h (100%) rename benchmarks/{opencl => old_opencl}/stencil/parboil_opencl.c (100%) create mode 100644 benchmarks/old_opencl/transpose/Makefile rename benchmarks/{new_opencl => old_opencl}/transpose/main.cc (95%) rename benchmarks/{new_opencl => old_opencl}/transpose/oclUtils.h (97%) rename benchmarks/{new_opencl => old_opencl}/transpose/shrQATest.h (96%) rename benchmarks/{new_opencl => old_opencl}/transpose/shrUtils.h (98%) rename benchmarks/{new_opencl => old_opencl}/transpose/transpose.cl (100%) rename benchmarks/{new_opencl => old_opencl}/transpose/transpose_gold.cpp (100%) create mode 100644 benchmarks/old_opencl/vecadd/Makefile rename benchmarks/{new_opencl => old_opencl}/vecadd/README (100%) rename benchmarks/{new_opencl => old_opencl}/vecadd/kernel.cl (100%) rename benchmarks/{opencl => old_opencl}/vecadd/libvecadd.a (100%) rename benchmarks/{new_opencl => old_opencl}/vecadd/main.cc (75%) rename benchmarks/{new_opencl => opencl}/bfs/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/compiler/bin/poclcc (100%) rename benchmarks/{new_opencl => opencl}/compiler/lib/libOpenCL.so (100%) rename benchmarks/{new_opencl => opencl}/compiler/lib/libOpenCL.so.2 (100%) rename benchmarks/{new_opencl => opencl}/compiler/lib/libOpenCL.so.2.5.0 (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_builtin_renames.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_clang_opencl.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_enable_all_exts.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_kernel.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_kernel_c.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/_kernel_constants.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/opencl-c-base.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/opencl-c.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/pocl.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/pocl_device.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/pocl_image_types.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/pocl_spir.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/include/pocl_types.h (100%) rename benchmarks/{new_opencl => opencl}/compiler/share/pocl/kernel-riscv32-unknown-unknown-elf-skylake.bc (100%) rename benchmarks/{new_opencl => opencl}/convolution/.gitignore (100%) rename benchmarks/{new_opencl => opencl}/guassian/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/kmeans/.gitignore (100%) rename benchmarks/{new_opencl => opencl}/kmeans/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/lib/libOpenCL.so (100%) rename benchmarks/{new_opencl => opencl}/lib/libOpenCL.so.2 (100%) rename benchmarks/{new_opencl => opencl}/lib/libOpenCL.so.2.5.0 (100%) rename benchmarks/{new_opencl => opencl}/nearn/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/results.txt (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl.hpp (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl2.hpp (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_d3d10.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_d3d11.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_dx9_media_sharing.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_dx9_media_sharing_intel.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_egl.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_ext.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_ext_intel.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_gl.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_gl_ext.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_platform.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_va_api_media_sharing_intel.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/cl_version.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/include/CL/opencl.h (100%) rename benchmarks/{new_opencl => opencl}/runtime/lib/libOpenCL.so (100%) rename benchmarks/{new_opencl => opencl}/runtime/lib/libOpenCL.so.2 (100%) rename benchmarks/{new_opencl => opencl}/runtime/lib/libOpenCL.so.2.5.0 (100%) rename benchmarks/{new_opencl => opencl}/saxpy/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/sfilter/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/sgemm/kernel.pocl (100%) rename benchmarks/{new_opencl => opencl}/sgemm/sgemm (100%) rename benchmarks/{new_opencl => opencl}/transpose/.gitignore (100%) rename benchmarks/{new_opencl => opencl}/vecadd/.gitignore (100%) rename benchmarks/{new_opencl => opencl}/vecadd/kernel.pocl (100%) rename {results => evaluation}/export_cycle_counts.py (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/saxpy.result (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/sfilter.result (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/sgemm.result (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/sgemm_ipc.result (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/test_all.sh (100%) rename {benchmarks/test_benchmark => evaluation/perf_2019_11_25}/test_bench.sh (100%) rename run_tests.sh => evaluation/run_tests.sh (100%) rename {results => evaluation}/synth_data.csv (100%) rename {rtl => hw}/configs/.gitignore (100%) rename {rtl => hw}/configs/.gitkeep (100%) rename {rtl => hw}/gen_config.py (100%) rename {rtl => hw}/gen_synth_configs.py (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/Makefile (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/env_vsim (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.bitmap (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.cpf (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.ctl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.lef (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.mdt (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.memlib (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.tv (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_m40c.lib_sh5p1cm (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_rtl.v (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p81v_0p81v_0c.avm (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p81v_0p81v_0c.dat (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p81v_0p81v_0c.lib (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p81v_0p81v_0c.ps (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/testbench.cpp (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/rf_tb.cr.mti (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/rf_tb.mpf (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/transcript (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/vsim.wlf (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib1_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib1_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib1_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib2_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib2_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib2_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib3_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib3_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib3_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib4_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib4_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib4_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib5_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib5_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/@_opt/_lib5_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_info (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_lib.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_lib1_0.qdb (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_lib1_0.qpg (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_lib1_0.qtl (100%) rename {models => hw/models}/memory/cln28hpc/rf2_32x128_wm1/vsim/work/_vmake (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_128x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_128x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x19_wm0_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_256x19_wm0_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_32x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/2d_hardmacro_db/rf2_32x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/convertToDBAll.csh (100%) rename {models => hw/models}/memory/cln28hpm/convert_lib_to_db.tcl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/command.log (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.bitmap (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.cpf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.ctl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.lef (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.mdt (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.memlib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.tv (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ff_0p99v_0p99v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ff_0p99v_0p99v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ff_0p99v_0p99v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ff_0p99v_0p99v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_rtl.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_ss_0p81v_0p81v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_tt_0p90v_0p90v_25c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_tt_0p90v_0p90v_25c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_tt_0p90v_0p90v_25c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1_tt_0p90v_0p90v_25c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/command.log (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.bitmap (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.cpf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.ctl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.lef (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.mdt (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.memlib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.tv (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ff_0p99v_0p99v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ff_0p99v_0p99v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ff_0p99v_0p99v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ff_0p99v_0p99v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_rtl.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_ss_0p81v_0p81v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_tt_0p90v_0p90v_25c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_tt_0p90v_0p90v_25c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_tt_0p90v_0p90v_25c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1_tt_0p90v_0p90v_25c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/command.log (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.bitmap (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.cpf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.ctl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.lef (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.mdt (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.memlib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.tv (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ff_0p99v_0p99v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ff_0p99v_0p99v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ff_0p99v_0p99v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ff_0p99v_0p99v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_rtl.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_ss_0p81v_0p81v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_tt_0p90v_0p90v_25c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_tt_0p90v_0p90v_25c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_tt_0p90v_0p90v_25c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0_tt_0p90v_0p90v_25c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/command.log (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.bitmap (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.cpf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.ctl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.lef (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.mdt (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.memlib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.tv (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_125c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ff_0p99v_0p99v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_rtl.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_ss_0p81v_0p81v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p90v_0p90v_25c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p90v_0p90v_25c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p90v_0p90v_25c.db (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p90v_0p90v_25c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1_tt_0p90v_0p90v_25c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/vsim/Makefile (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x128_wm1/vsim/transcript (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.bitmap (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.cpf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.ctl (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.lef (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.mdt (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.memlib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.tv (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_antenna.clf (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ff_0p99v_0p99v_125c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ff_0p99v_0p99v_125c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ff_0p99v_0p99v_125c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ff_0p99v_0p99v_125c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_rtl.v (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ss_0p81v_0p81v_m40c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ss_0p81v_0p81v_m40c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ss_0p81v_0p81v_m40c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_ss_0p81v_0p81v_m40c.ps (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_tt_0p90v_0p90v_25c.avm (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_tt_0p90v_0p90v_25c.dat (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_tt_0p90v_0p90v_25c.lib (100%) rename {models => hw/models}/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0_tt_0p90v_0p90v_25c.ps (100%) rename {old_rtl => hw}/modelsim/Makefile (100%) rename {old_rtl => hw}/modelsim/cshrc.modelsim (100%) rename {old_rtl => hw}/modelsim/modelsim.mpf (100%) rename {old_rtl => hw}/modelsim/vortex_dpi.cpp (100%) rename {old_rtl => hw}/modelsim/vortex_dpi.h (100%) rename {old_rtl => hw}/modelsim/vortex_tb.v (100%) rename {old_rtl => hw}/modelsim/work/_info (100%) rename {old_rtl => hw}/modelsim/work/_lib.qdb (100%) rename {old_rtl => hw}/modelsim/work/_lib1_0.qdb (100%) rename {old_rtl => hw}/modelsim/work/_lib1_0.qpg (100%) rename {old_rtl => hw}/modelsim/work/_lib1_0.qtl (100%) rename {old_rtl => hw}/modelsim/work/_vmake (100%) rename {old_rtl => hw/old_rtl}/Makefile (100%) rename {old_rtl => hw/old_rtl}/VX_alu.v (100%) rename {old_rtl => hw/old_rtl}/VX_back_end.v (100%) rename {old_rtl => hw/old_rtl}/VX_countones.v (100%) rename {old_rtl => hw/old_rtl}/VX_csr_data.v (100%) rename {old_rtl => hw/old_rtl}/VX_csr_handler.v (100%) rename {old_rtl => hw/old_rtl}/VX_csr_pipe.v (100%) rename {old_rtl => hw/old_rtl}/VX_csr_wrapper.v (100%) rename {old_rtl => hw/old_rtl}/VX_decode.v (100%) rename {old_rtl => hw/old_rtl}/VX_define.v (100%) rename {old_rtl => hw/old_rtl}/VX_define_synth.v (100%) rename {old_rtl => hw/old_rtl}/VX_dmem_controller.v (100%) rename {old_rtl => hw/old_rtl}/VX_execute_unit.v (100%) rename {old_rtl => hw/old_rtl}/VX_fetch.v (100%) rename {old_rtl => hw/old_rtl}/VX_front_end.v (100%) rename {old_rtl => hw/old_rtl}/VX_generic_priority_encoder.v (100%) rename {old_rtl => hw/old_rtl}/VX_generic_register.v (100%) rename {old_rtl => hw/old_rtl}/VX_generic_stack.v (100%) rename {old_rtl => hw/old_rtl}/VX_gpgpu_inst.v (100%) rename {old_rtl => hw/old_rtl}/VX_gpr.v (100%) rename {old_rtl => hw/old_rtl}/VX_gpr_stage.v (100%) rename {old_rtl => hw/old_rtl}/VX_gpr_wrapper.v (100%) rename {old_rtl => hw/old_rtl}/VX_inst_multiplex.v (100%) rename {old_rtl => hw/old_rtl}/VX_lsu.v (100%) rename {old_rtl => hw/old_rtl}/VX_lsu_addr_gen.v (100%) rename {old_rtl => hw/old_rtl}/VX_priority_encoder.v (100%) rename {old_rtl => hw/old_rtl}/VX_priority_encoder_w_mask.v (100%) rename {old_rtl => hw/old_rtl}/VX_scheduler.v (100%) rename {old_rtl => hw/old_rtl}/VX_warp.v (100%) rename {old_rtl => hw/old_rtl}/VX_warp_scheduler.v (100%) rename {old_rtl => hw/old_rtl}/VX_writeback.v (100%) rename {old_rtl => hw/old_rtl}/Vortex.v (100%) rename {old_rtl => hw/old_rtl}/byte_enabled_simple_dual_port_ram.v (100%) rename {old_rtl => hw/old_rtl}/cache/Makefile (100%) rename {old_rtl => hw/old_rtl}/cache/Notes (100%) rename {old_rtl => hw/old_rtl}/cache/VX_Cache_Bank.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_cache_bank_valid.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_cache_data.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_cache_data_per_index.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_d_cache.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_d_cache_encapsulate.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_d_cache_tb.v (100%) rename {old_rtl => hw/old_rtl}/cache/VX_generic_pe.v (100%) rename {old_rtl => hw/old_rtl}/cache/cache_set.v (100%) rename {old_rtl => hw/old_rtl}/cache/d_cache_test_bench.cpp (100%) rename {old_rtl => hw/old_rtl}/cache/d_cache_test_bench.h (100%) rename {old_rtl => hw/old_rtl}/cache/d_cache_test_bench_debug.h (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_branch_response_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_csr_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_csr_wb_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_dcache_request_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_dcache_response_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_dram_req_rsp_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_exec_unit_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_frE_to_bckE_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpr_clone_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpr_data_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpr_jal_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpr_read_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpr_wspawn_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_gpu_inst_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_icache_request_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_icache_response_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_inst_exec_wb_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_inst_mem_wb_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_inst_meta_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_jal_response_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_join_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_lsu_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_mem_req_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_mw_wb_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_warp_ctl_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_wb_inter.v (100%) rename {old_rtl => hw/old_rtl}/interfaces/VX_wstall_inter.v (100%) rename {rtl => hw/old_rtl}/modelsim/Makefile (100%) rename {rtl => hw/old_rtl}/modelsim/cshrc.modelsim (100%) rename {rtl => hw/old_rtl}/modelsim/modelsim.mpf (100%) rename {rtl => hw/old_rtl}/modelsim/vortex_dpi.cpp (100%) rename {rtl => hw/old_rtl}/modelsim/vortex_dpi.h (100%) rename {rtl => hw/old_rtl}/modelsim/vortex_tb.v (100%) rename {rtl => hw/old_rtl}/modelsim/work/_info (100%) rename {rtl => hw/old_rtl}/modelsim/work/_lib.qdb (100%) rename {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qdb (100%) rename {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qpg (100%) rename {rtl => hw/old_rtl}/modelsim/work/_lib1_0.qtl (100%) rename {rtl => hw/old_rtl}/modelsim/work/_vmake (100%) rename {old_rtl => hw/old_rtl}/pipe_regs/VX_d_e_reg.v (100%) rename {old_rtl => hw/old_rtl}/pipe_regs/VX_f_d_reg.v (100%) rename {old_rtl => hw/old_rtl}/quartus/Makefile (100%) rename {old_rtl => hw/old_rtl}/quartus/VX_gpr_syn.qpf (100%) rename {old_rtl => hw/old_rtl}/quartus/VX_gpr_syn.qsf (100%) rename {old_rtl => hw/old_rtl}/quartus/asm.chg (100%) rename {old_rtl => hw/old_rtl}/quartus/fit.chg (100%) rename {old_rtl => hw/old_rtl}/quartus/map.chg (100%) rename {old_rtl => hw/old_rtl}/quartus/project.tcl (100%) rename {old_rtl => hw/old_rtl}/quartus/smart.log (100%) rename {old_rtl => hw/old_rtl}/quartus/sta.chg (100%) rename {old_rtl => hw/old_rtl}/quartus/syn.chg (100%) rename {old_rtl => hw/old_rtl}/quartus/vortex.ini (100%) rename {old_rtl => hw/old_rtl}/quartus/vortex.sdc (100%) rename {old_rtl => hw/old_rtl}/results.txt (100%) rename {old_rtl => hw/old_rtl}/shared_memory/VX_bank_valids.v (100%) rename {old_rtl => hw/old_rtl}/shared_memory/VX_priority_encoder_sm.v (100%) rename {old_rtl => hw/old_rtl}/shared_memory/VX_shared_memory.v (100%) rename {old_rtl => hw/old_rtl}/shared_memory/VX_shared_memory_block.v (100%) rename {old_rtl => hw/old_rtl}/simulate/VX_define.h (100%) rename {old_rtl => hw/old_rtl}/simulate/ram.h (100%) rename {old_rtl => hw/old_rtl}/simulate/tb_debug.h (100%) rename {old_rtl => hw/old_rtl}/simulate/test_bench.cpp (100%) rename {old_rtl => hw/old_rtl}/simulate/test_bench.h (100%) rename {driver/hw => hw/opae}/Makefile (100%) rename {driver/hw => hw/opae}/README (100%) rename {driver/hw => hw/opae}/ccip_interface_reg.sv (100%) rename {driver/hw => hw/opae}/ccip_std_afu.sv (100%) rename {driver => hw/opae}/opae_setup.sh (100%) rename {driver => hw/opae}/set_env.sh (100%) rename {driver/hw => hw/opae}/sources.txt (100%) rename {driver/hw => hw/opae}/vortex_afu.json (100%) rename {driver/hw => hw/opae}/vortex_afu.sv (100%) rename {driver/hw => hw/opae}/wave.do (100%) rename {rtl => hw/rtl}/.DS_Store (100%) rename {rtl => hw/rtl}/.gitignore (100%) rename {rtl => hw/rtl}/Makefile (100%) rename {rtl => hw/rtl}/VX_alu.v (100%) rename {rtl => hw/rtl}/VX_back_end.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_bank.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_config.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_core_req_bank_sel.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_dfq_queue.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_dram_req_arb.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_miss_resrv.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_req_queue.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_cache_wb_sel_merge.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_dcache_llv_resp_bank_sel.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_fill_invalidator.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_mrv_queue.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_prefetcher.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_snp_fwd_arb.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_tag_data_access.v (100%) rename {rtl => hw/rtl}/VX_cache/VX_tag_data_structure.v (100%) rename {rtl => hw/rtl}/VX_countones.v (100%) rename {rtl => hw/rtl}/VX_csr_data.v (100%) rename {rtl => hw/rtl}/VX_csr_handler.v (100%) rename {rtl => hw/rtl}/VX_csr_pipe.v (100%) rename {rtl => hw/rtl}/VX_csr_wrapper.v (100%) rename {rtl => hw/rtl}/VX_decode.v (100%) rename {rtl => hw/rtl}/VX_define.v (100%) rename {rtl => hw/rtl}/VX_dmem_controller.v (100%) rename {rtl => hw/rtl}/VX_execute_unit.v (100%) rename {rtl => hw/rtl}/VX_fetch.v (100%) rename {rtl => hw/rtl}/VX_front_end.v (100%) rename {rtl => hw/rtl}/VX_generic_priority_encoder.v (100%) rename {rtl => hw/rtl}/VX_generic_queue.v (100%) rename {rtl => hw/rtl}/VX_generic_queue_ll.v (100%) rename {rtl => hw/rtl}/VX_generic_register.v (100%) rename {rtl => hw/rtl}/VX_generic_stack.v (100%) rename {rtl => hw/rtl}/VX_gpgpu_inst.v (100%) rename {rtl => hw/rtl}/VX_gpr.v (100%) rename {rtl => hw/rtl}/VX_gpr_stage.v (100%) rename {rtl => hw/rtl}/VX_gpr_wrapper.v (100%) rename {rtl => hw/rtl}/VX_icache_stage.v (100%) rename {rtl => hw/rtl}/VX_inst_multiplex.v (100%) rename {rtl => hw/rtl}/VX_lsu.v (100%) rename {rtl => hw/rtl}/VX_lsu_addr_gen.v (100%) rename {rtl => hw/rtl}/VX_priority_encoder.v (100%) rename {rtl => hw/rtl}/VX_priority_encoder_w_mask.v (100%) rename {rtl => hw/rtl}/VX_scheduler.v (100%) rename {rtl => hw/rtl}/VX_warp.v (100%) rename {rtl => hw/rtl}/VX_warp_scheduler.v (100%) rename {rtl => hw/rtl}/VX_writeback.v (100%) rename {rtl => hw/rtl}/Vortex.v (100%) rename {rtl => hw/rtl}/Vortex_Cluster.v (100%) rename {rtl => hw/rtl}/Vortex_SOC.v (100%) rename {rtl => hw/rtl}/byte_enabled_simple_dual_port_ram.v (100%) rename {rtl => hw/rtl}/cache/Makefile (100%) rename {rtl => hw/rtl}/cache/Notes (100%) rename {rtl => hw/rtl}/cache/VX_Cache_Bank.v (100%) rename {rtl => hw/rtl}/cache/VX_cache_bank_valid.v (100%) rename {rtl => hw/rtl}/cache/VX_cache_data.v (100%) rename {rtl => hw/rtl}/cache/VX_cache_data_per_index.v (100%) rename {rtl => hw/rtl}/cache/VX_d_cache.v (100%) rename {rtl => hw/rtl}/cache/VX_d_cache_encapsulate.v (100%) rename {rtl => hw/rtl}/cache/VX_d_cache_tb.v (100%) rename {rtl => hw/rtl}/cache/VX_generic_pe.v (100%) rename {rtl => hw/rtl}/cache/cache_set.v (100%) rename {rtl => hw/rtl}/cache/d_cache_test_bench.cpp (100%) rename {rtl => hw/rtl}/cache/d_cache_test_bench.h (100%) rename {rtl => hw/rtl}/cache/d_cache_test_bench_debug.h (100%) rename {rtl => hw/rtl}/compat/VX_divide.v (100%) rename {rtl => hw/rtl}/compat/VX_mult.v (100%) rename {rtl => hw/rtl}/compat/VX_tb_divide.sv (100%) rename {rtl => hw/rtl}/interfaces/VX_branch_response_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_csr_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_csr_wb_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_dcache_request_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_dcache_response_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_dram_req_rsp_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_exec_unit_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_frE_to_bckE_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpr_clone_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpr_data_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpr_jal_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpr_read_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpr_wspawn_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_dcache_dram_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_dcache_dram_res_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_dcache_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_dcache_res_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_dcache_snp_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_inst_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_gpu_snp_req_rsp.v (100%) rename {rtl => hw/rtl}/interfaces/VX_icache_request_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_icache_response_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_inst_exec_wb_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_inst_mem_wb_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_inst_meta_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_jal_response_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_join_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_lsu_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_mem_req_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_mw_wb_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_warp_ctl_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_wb_inter.v (100%) rename {rtl => hw/rtl}/interfaces/VX_wstall_inter.v (100%) rename {rtl => hw/rtl}/pipe_regs/VX_d_e_reg.v (100%) rename {rtl => hw/rtl}/pipe_regs/VX_f_d_reg.v (100%) rename {rtl => hw/rtl}/pipe_regs/VX_i_d_reg.v (100%) rename {rtl => hw/rtl}/shared_memory/VX_bank_valids.v (100%) rename {rtl => hw/rtl}/shared_memory/VX_priority_encoder_sm.v (100%) rename {rtl => hw/rtl}/shared_memory/VX_shared_memory.v (100%) rename {rtl => hw/rtl}/shared_memory/VX_shared_memory_block.v (100%) rename {rtl/cache/quartus => hw/syn/quartus/cache}/Makefile (100%) rename {rtl/VX_cache/quartus => hw/syn/quartus/cache}/project.sdc (100%) rename {rtl/VX_cache/quartus => hw/syn/quartus/cache}/project.tcl (100%) rename {rtl/quartus => hw/syn/quartus/top}/Makefile (100%) rename {rtl/quartus => hw/syn/quartus/top}/VX_timing.tcl (100%) rename {rtl/cache/quartus => hw/syn/quartus/top}/project.tcl (100%) rename {rtl/quartus => hw/syn/quartus/top}/vortex.sdc (100%) rename {rtl/VX_cache/quartus => hw/syn/quartus/vx_cache}/Makefile (100%) rename {rtl/cache/quartus => hw/syn/quartus/vx_cache}/project.sdc (100%) rename {rtl/quartus => hw/syn/quartus/vx_cache}/project.tcl (100%) rename {syn => hw/syn/synopsys}/191017.log (100%) rename {syn => hw/syn/synopsys}/Makefile (100%) rename {syn => hw/syn/synopsys}/NanGate_15nm_OCL.db (100%) rename {syn => hw/syn/synopsys}/Vortex.ddc (100%) rename {syn => hw/syn/synopsys}/Vortex.netlist.v (100%) rename {syn => hw/syn/synopsys}/Vortex.sdc (100%) rename {syn => hw/syn/synopsys}/cshrc.dc (100%) rename {syn => hw/syn/synopsys}/dc.log (100%) rename {syn => hw/syn/synopsys}/dc_1GHz.log (100%) rename {syn => hw/syn/synopsys}/dc_noOpt.log (100%) rename {syn => hw/syn/synopsys}/esyn.tcl (100%) rename {syn => hw/syn/synopsys}/fsyn.tcl (100%) rename {syn => hw/syn/synopsys}/run_mult_synth.sh (100%) rename {syn => hw/syn/synopsys}/syn.tcl (100%) rename {syn => hw/syn/synopsys}/vortex_syn.log (100%) rename {rtl => hw}/unit_tests/generic_queue/Makefile (100%) rename {rtl => hw}/unit_tests/generic_queue/testbench.v (100%) rename {rtl/simulate => hw/verilator}/ram.h (100%) rename {rtl/simulate => hw/verilator}/simulator.cpp (100%) rename {rtl/simulate => hw/verilator}/simulator.h (100%) rename {rtl/simulate => hw/verilator}/test_bench.cpp (100%) rename {driver/sw => sw/driver}/Makefile (100%) rename {driver/sw => sw/driver/common}/vx_utils.cpp (100%) rename {driver/sw => sw/driver}/include/vortex.h (100%) rename {driver/sw => sw/driver}/opae/Makefile (100%) rename {driver/sw => sw/driver}/opae/vortex.cpp (100%) rename {driver/sw => sw/driver}/rtlsim/.gitignore (100%) rename {driver/sw => sw/driver}/rtlsim/Makefile (100%) rename {driver/sw => sw/driver}/rtlsim/vortex.cpp (100%) rename {driver/sw => sw/driver}/simx/.gitignore (100%) rename {driver/sw => sw/driver}/simx/Makefile (100%) rename {driver/sw => sw/driver}/simx/vortex.cpp (100%) rename {driver/sw => sw/driver}/stub/Makefile (100%) rename {driver/sw => sw/driver}/stub/vortex.cpp (100%) rename {driver => sw/driver}/tests/basic/Makefile (100%) rename {driver => sw/driver}/tests/basic/basic (100%) rename {driver => sw/driver}/tests/basic/basic.cpp (100%) rename {driver => sw/driver}/tests/basic/kernel.bin (100%) rename {driver => sw/driver}/tests/basic/kernel.c (100%) rename {driver => sw/driver}/tests/demo/Makefile (100%) rename {driver => sw/driver}/tests/demo/common.h (100%) rename {driver => sw/driver}/tests/demo/demo (100%) rename {driver => sw/driver}/tests/demo/demo.cpp (100%) rename {driver => sw/driver}/tests/demo/kernel.bin (100%) rename {driver => sw/driver}/tests/demo/kernel.c (100%) rename {driver => sw/driver}/tests/demo/kernel.elf (100%) rename {driver => sw/driver}/tests/demo/run.log (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/rtl/_hdr (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/rtl/cci_hello.json (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/rtl/sources.txt (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/hw/sim/setup_ase (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/sw/Makefile (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/sw/cci_hello.c (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/sw/obj/afu_json_info.h (100%) rename {driver => sw/driver/tests}/dogfood/Memcpy/sw/obj/cci_hello.o (100%) rename {runtime => sw/runtime}/.gitignore (100%) rename {runtime => sw/runtime}/Makefile (100%) rename {runtime => sw/runtime}/fileio/fileio.h (100%) rename {runtime => sw/runtime}/fileio/fileio.s (100%) rename {runtime => sw/runtime}/intrinsics/vx_intrinsics.h (100%) rename {runtime => sw/runtime}/intrinsics/vx_intrinsics.s (100%) rename {runtime => sw/runtime}/io/vx_io.c (100%) rename {runtime => sw/runtime}/io/vx_io.h (100%) rename {runtime => sw/runtime}/io/vx_io.s (100%) rename {runtime => sw/runtime}/newlib/newlib.c (100%) rename {runtime => sw/runtime}/qemu/vx_api.c (100%) rename {runtime => sw/runtime}/startup/vx_start.S (100%) rename {runtime/tests => sw/runtime/tests/common}/tests.c (100%) rename {runtime/tests => sw/runtime/tests/common}/tests.h (100%) rename {runtime/mains/simple => sw/runtime/tests/common}/vx_tempelate.c (100%) rename {runtime/mains => sw/runtime/tests}/dev/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/dev/vx_dev_main.c (100%) rename {runtime/mains => sw/runtime/tests}/dev/vx_dev_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/dev/vx_dev_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/dev/vx_dev_main.hex (100%) rename {runtime/mains => sw/runtime/tests}/hello/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/hello/hello.cpp (100%) rename {runtime/mains => sw/runtime/tests}/hello/hello.dump (100%) rename {runtime/mains => sw/runtime/tests}/hello/hello.elf (100%) rename {runtime/mains => sw/runtime/tests}/hello/hello.hex (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/CMakeLists.txt (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl.hpp (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl2.hpp (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_d3d10.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_d3d11.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_dx9_media_sharing.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_dx9_media_sharing_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_egl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_ext_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_gl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_gl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_platform.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_va_api_media_sharing_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/cl_version.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CL/opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/CMakeLists.txt (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl.hpp (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl_gl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl_gl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/cl_platform.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/OpenCL/opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_builtin_renames.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_clang_opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_enable_all_exts.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_kernel.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_kernel_c.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/_kernel_constants.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_cache.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_compiler_features.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_context.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_device.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_file_util.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_image_types.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_spir.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_types.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/pocl_workgroup_func.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/poclu.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/utlist.h (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/include/vccompat.hpp (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/libOpenCL.a (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/libopencl.dump (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/libvecadd.a (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/libvecadd.dump (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/vecadd (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/libs/vecadd.dump (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/vx_pocl_main.c (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/vx_pocl_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/vx_pocl_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/nativevecadd/vx_pocl_main.hex (100%) rename {runtime/mains => sw/runtime/tests}/nlTest/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/nlTest/vx_nl_main.c (100%) rename {runtime/mains => sw/runtime/tests}/nlTest/vx_nl_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/nlTest/vx_nl_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/nlTest/vx_nl_main.hex (100%) rename {runtime/mains => sw/runtime/tests}/simple/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/simple/vx_simple_main.c (100%) rename {runtime/mains => sw/runtime/tests}/simple/vx_simple_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/simple/vx_simple_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/simple/vx_simple_main.hex (100%) rename {runtime => sw/runtime/tests/simple}/vx_tempelate.c (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/CMakeLists.txt (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl.hpp (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl2.hpp (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_d3d10.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_d3d11.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_dx9_media_sharing.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_dx9_media_sharing_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_egl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_ext_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_gl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_gl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_platform.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_va_api_media_sharing_intel.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/cl_version.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CL/opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/CMakeLists.txt (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl.hpp (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl_gl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl_gl_ext.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/cl_platform.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/OpenCL/opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_builtin_renames.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_clang_opencl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_enable_all_exts.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_kernel.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_kernel_c.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/_kernel_constants.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_cache.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_compiler_features.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_context.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_device.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_file_util.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_image_types.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_spir.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_types.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/pocl_workgroup_func.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/poclu.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/utlist.h (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/include/vccompat.hpp (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/libOpenCL.a (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/libopencl.dump (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/libvecadd.a (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/libvecadd.dump (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/vecadd (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/libs/vecadd.dump (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/vecadd.cl (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/vx_pocl_main.c (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/vx_pocl_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/vx_pocl_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/vecadd/vx_pocl_main.hex (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/Makefile (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vec.h (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vec.s (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vec_original.s (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vector_main.c (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vector_main.dump (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vector_main.elf (100%) rename {runtime/mains => sw/runtime/tests}/vector_test/vx_vector_main.hex (100%) rename {runtime/mains => sw/runtime/tests}/vortex_link.ld (100%) rename {runtime => sw/runtime}/vx_api/vx_api.c (100%) rename {runtime => sw/runtime}/vx_api/vx_api.h (100%) rename {rvvector => sw/rvvector}/basic/Makefile (100%) rename {rvvector => sw/rvvector}/basic/_1_vx_vec.s (100%) rename {rvvector => sw/rvvector}/basic/_1_vx_vector_main.c (100%) rename {rvvector => sw/rvvector}/basic/__vx_vector_main.c (100%) rename {rvvector => sw/rvvector}/basic/vx_vec.h (100%) rename {rvvector => sw/rvvector}/basic/vx_vec.s (100%) rename {rvvector => sw/rvvector}/basic/vx_vec_main.c (100%) rename {rvvector => sw/rvvector}/basic/vx_vector_main.c (100%) rename {rvvector => sw/rvvector}/benchmark_temp/1 (100%) rename {rvvector => sw/rvvector}/benchmark_temp/Makefile (100%) rename {rvvector => sw/rvvector}/benchmark_temp/TO_DO_LIST (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_benchmark.c (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_benchmark.dump (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_benchmark.elf (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_benchmark.h (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_benchmark.hex (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_memcpy.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_saxpy.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_saxpy_float.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_sgemm.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_sgemm_float.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_vsadd.s (100%) rename {rvvector => sw/rvvector}/benchmark_temp/vx_vec_vvaddint32.s (100%) diff --git a/benchmarks/new_opencl/bfs/Makefile b/benchmarks/new_opencl/bfs/Makefile deleted file mode 100644 index 54bd3805..00000000 --- a/benchmarks/new_opencl/bfs/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = bfs - -SRCS = main.cc - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/convolution/Makefile b/benchmarks/new_opencl/convolution/Makefile deleted file mode 100644 index d0c878f7..00000000 --- a/benchmarks/new_opencl/convolution/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCLRT_PATH)/include - -LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex - -PROJECT = convolution - -SRCS = main.cpp utils.cpp - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif diff --git a/benchmarks/new_opencl/guassian/Makefile b/benchmarks/new_opencl/guassian/Makefile deleted file mode 100644 index 2584104d..00000000 --- a/benchmarks/new_opencl/guassian/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = guassian - -SRCS = main.cc clutils.cpp utils.cpp - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/kmeans/Makefile b/benchmarks/new_opencl/kmeans/Makefile deleted file mode 100644 index d08ff166..00000000 --- a/benchmarks/new_opencl/kmeans/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = kmeans - -SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif diff --git a/benchmarks/new_opencl/nearn/Makefile b/benchmarks/new_opencl/nearn/Makefile deleted file mode 100644 index 3fbd20e0..00000000 --- a/benchmarks/new_opencl/nearn/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = nearn - -SRCS = main.cc clutils.cpp utils.cpp - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/saxpy/Makefile b/benchmarks/new_opencl/saxpy/Makefile deleted file mode 100644 index 55773533..00000000 --- a/benchmarks/new_opencl/saxpy/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = saxpy - -SRCS = main.cc - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/sfilter/Makefile b/benchmarks/new_opencl/sfilter/Makefile deleted file mode 100644 index d1d0a41d..00000000 --- a/benchmarks/new_opencl/sfilter/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = sfilter - -SRCS = main.cc - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/sgemm/Makefile b/benchmarks/new_opencl/sgemm/Makefile deleted file mode 100644 index b2817ae4..00000000 --- a/benchmarks/new_opencl/sgemm/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = sgemm - -SRCS = main.cc - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/new_opencl/transpose/Makefile b/benchmarks/new_opencl/transpose/Makefile deleted file mode 100644 index c122c176..00000000 --- a/benchmarks/new_opencl/transpose/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCLRT_PATH)/include - -LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex - -PROJECT = transpose - -SRCS = main.cc transpose_gold.cpp - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif diff --git a/benchmarks/new_opencl/vecadd/Makefile b/benchmarks/new_opencl/vecadd/Makefile deleted file mode 100644 index 145f51ea..00000000 --- a/benchmarks/new_opencl/vecadd/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops -SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf -POCL_CC_PATH ?= $(realpath ../compiler) -POCL_RT_PATH ?= $(realpath ../runtime) -VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) -VORTEX_RT_PATH ?= $(realpath ../../../runtime) - -CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors - -CXXFLAGS += -I$(POCL_RT_PATH)/include - -LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex - -PROJECT = vecadd - -SRCS = main.cc - -all: $(PROJECT) - -kernel.pocl: kernel.cl - TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl - -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-ase: $(PROJECT) kernel.pocl - ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) - -run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - -clean: - rm -rf $(PROJECT) *.o *.dump .depend - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif \ No newline at end of file diff --git a/benchmarks/opencl/BlackScholes/BlackScholes.cl b/benchmarks/old_opencl/BlackScholes/BlackScholes.cl similarity index 100% rename from benchmarks/opencl/BlackScholes/BlackScholes.cl rename to benchmarks/old_opencl/BlackScholes/BlackScholes.cl diff --git a/benchmarks/opencl/BlackScholes/Makefile b/benchmarks/old_opencl/BlackScholes/Makefile similarity index 100% rename from benchmarks/opencl/BlackScholes/Makefile rename to benchmarks/old_opencl/BlackScholes/Makefile diff --git a/benchmarks/opencl/BlackScholes/main.cpp b/benchmarks/old_opencl/BlackScholes/main.cpp similarity index 100% rename from benchmarks/opencl/BlackScholes/main.cpp rename to benchmarks/old_opencl/BlackScholes/main.cpp diff --git a/benchmarks/opencl/BlackScholes/oclBlackScholes.pdf b/benchmarks/old_opencl/BlackScholes/oclBlackScholes.pdf similarity index 100% rename from benchmarks/opencl/BlackScholes/oclBlackScholes.pdf rename to benchmarks/old_opencl/BlackScholes/oclBlackScholes.pdf diff --git a/benchmarks/opencl/BlackScholes/oclBlackScholes_common.h b/benchmarks/old_opencl/BlackScholes/oclBlackScholes_common.h similarity index 100% rename from benchmarks/opencl/BlackScholes/oclBlackScholes_common.h rename to benchmarks/old_opencl/BlackScholes/oclBlackScholes_common.h diff --git a/benchmarks/opencl/BlackScholes/oclBlackScholes_gold.cpp b/benchmarks/old_opencl/BlackScholes/oclBlackScholes_gold.cpp similarity index 100% rename from benchmarks/opencl/BlackScholes/oclBlackScholes_gold.cpp rename to benchmarks/old_opencl/BlackScholes/oclBlackScholes_gold.cpp diff --git a/benchmarks/opencl/BlackScholes/oclBlackScholes_launcher.cpp b/benchmarks/old_opencl/BlackScholes/oclBlackScholes_launcher.cpp similarity index 100% rename from benchmarks/opencl/BlackScholes/oclBlackScholes_launcher.cpp rename to benchmarks/old_opencl/BlackScholes/oclBlackScholes_launcher.cpp diff --git a/benchmarks/opencl/BlackScholes/oclUtils.h b/benchmarks/old_opencl/BlackScholes/oclUtils.h similarity index 100% rename from benchmarks/opencl/BlackScholes/oclUtils.h rename to benchmarks/old_opencl/BlackScholes/oclUtils.h diff --git a/benchmarks/opencl/BlackScholes/shrQATest.h b/benchmarks/old_opencl/BlackScholes/shrQATest.h similarity index 100% rename from benchmarks/opencl/BlackScholes/shrQATest.h rename to benchmarks/old_opencl/BlackScholes/shrQATest.h diff --git a/benchmarks/opencl/BlackScholes/shrUtils.h b/benchmarks/old_opencl/BlackScholes/shrUtils.h similarity index 100% rename from benchmarks/opencl/BlackScholes/shrUtils.h rename to benchmarks/old_opencl/BlackScholes/shrUtils.h diff --git a/benchmarks/opencl/DotProduct/DotProduct.cl b/benchmarks/old_opencl/DotProduct/DotProduct.cl similarity index 100% rename from benchmarks/opencl/DotProduct/DotProduct.cl rename to benchmarks/old_opencl/DotProduct/DotProduct.cl diff --git a/benchmarks/opencl/DotProduct/Makefile b/benchmarks/old_opencl/DotProduct/Makefile similarity index 100% rename from benchmarks/opencl/DotProduct/Makefile rename to benchmarks/old_opencl/DotProduct/Makefile diff --git a/benchmarks/opencl/DotProduct/main.cc b/benchmarks/old_opencl/DotProduct/main.cc similarity index 100% rename from benchmarks/opencl/DotProduct/main.cc rename to benchmarks/old_opencl/DotProduct/main.cc diff --git a/benchmarks/opencl/DotProduct/oclUtils.h b/benchmarks/old_opencl/DotProduct/oclUtils.h similarity index 100% rename from benchmarks/opencl/DotProduct/oclUtils.h rename to benchmarks/old_opencl/DotProduct/oclUtils.h diff --git a/benchmarks/opencl/DotProduct/shrQATest.h b/benchmarks/old_opencl/DotProduct/shrQATest.h similarity index 100% rename from benchmarks/opencl/DotProduct/shrQATest.h rename to benchmarks/old_opencl/DotProduct/shrQATest.h diff --git a/benchmarks/opencl/DotProduct/shrUtils.h b/benchmarks/old_opencl/DotProduct/shrUtils.h similarity index 100% rename from benchmarks/opencl/DotProduct/shrUtils.h rename to benchmarks/old_opencl/DotProduct/shrUtils.h diff --git a/benchmarks/opencl/VectorHypot/Makefile b/benchmarks/old_opencl/VectorHypot/Makefile similarity index 100% rename from benchmarks/opencl/VectorHypot/Makefile rename to benchmarks/old_opencl/VectorHypot/Makefile diff --git a/benchmarks/opencl/VectorHypot/VectorHypot.cl b/benchmarks/old_opencl/VectorHypot/VectorHypot.cl similarity index 100% rename from benchmarks/opencl/VectorHypot/VectorHypot.cl rename to benchmarks/old_opencl/VectorHypot/VectorHypot.cl diff --git a/benchmarks/opencl/VectorHypot/main.cc b/benchmarks/old_opencl/VectorHypot/main.cc similarity index 100% rename from benchmarks/opencl/VectorHypot/main.cc rename to benchmarks/old_opencl/VectorHypot/main.cc diff --git a/benchmarks/opencl/VectorHypot/oclUtils.h b/benchmarks/old_opencl/VectorHypot/oclUtils.h similarity index 100% rename from benchmarks/opencl/VectorHypot/oclUtils.h rename to benchmarks/old_opencl/VectorHypot/oclUtils.h diff --git a/benchmarks/opencl/VectorHypot/shrQATest.h b/benchmarks/old_opencl/VectorHypot/shrQATest.h similarity index 100% rename from benchmarks/opencl/VectorHypot/shrQATest.h rename to benchmarks/old_opencl/VectorHypot/shrQATest.h diff --git a/benchmarks/opencl/VectorHypot/shrUtils.h b/benchmarks/old_opencl/VectorHypot/shrUtils.h similarity index 100% rename from benchmarks/opencl/VectorHypot/shrUtils.h rename to benchmarks/old_opencl/VectorHypot/shrUtils.h diff --git a/benchmarks/new_opencl/bfs/CLHelper.h b/benchmarks/old_opencl/bfs/CLHelper.h similarity index 93% rename from benchmarks/new_opencl/bfs/CLHelper.h rename to benchmarks/old_opencl/bfs/CLHelper.h index 4ea9b747..3fc1e23e 100755 --- a/benchmarks/new_opencl/bfs/CLHelper.h +++ b/benchmarks/old_opencl/bfs/CLHelper.h @@ -39,27 +39,6 @@ string kernel_names[2] = {"BFS_1", "BFS_2"}; int work_group_size = 512; int device_id_inused = 0; // deviced id used (default : 0) -int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - /* * Converts the contents of a file into a string */ @@ -243,25 +222,14 @@ free(allPlatforms);*/ const char * source = source_str.c_str(); size_t sourceSize[] = { source_str.length() };*/ - //oclHandles.program = clCreateProgramWithBuiltInKernels( - // oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], - // "BFS_1;BFS_2", &resultCL); + oclHandles.program = clCreateProgramWithBuiltInKernels( + oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], + "BFS_1;BFS_2", &resultCL); /*oclHandles.program = clCreateProgramWithSource(oclHandles.context, 1, &source, sourceSize, &resultCL);*/ - // read kernel binary from file - uint8_t *kernel_bin = NULL; - size_t kernel_size; - cl_int binary_status = 0; - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - std::abort(); - - oclHandles.program = clCreateProgramWithBinary( - oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], &kernel_size, &kernel_bin, &binary_status, &resultCL); - free(kernel_bin); - if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)) throw(string("InitCL()::Error: Loading Binary into cl_program. " "(clCreateProgramWithBinary)")); diff --git a/benchmarks/old_opencl/bfs/Makefile b/benchmarks/old_opencl/bfs/Makefile new file mode 100644 index 00000000..0619b907 --- /dev/null +++ b/benchmarks/old_opencl/bfs/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = bfs + +SRCS = main.cc + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/new_opencl/bfs/README b/benchmarks/old_opencl/bfs/README similarity index 100% rename from benchmarks/new_opencl/bfs/README rename to benchmarks/old_opencl/bfs/README diff --git a/benchmarks/new_opencl/bfs/graph4096.txt b/benchmarks/old_opencl/bfs/graph4096.txt similarity index 100% rename from benchmarks/new_opencl/bfs/graph4096.txt rename to benchmarks/old_opencl/bfs/graph4096.txt diff --git a/benchmarks/new_opencl/bfs/kernel.cl b/benchmarks/old_opencl/bfs/kernel.cl similarity index 100% rename from benchmarks/new_opencl/bfs/kernel.cl rename to benchmarks/old_opencl/bfs/kernel.cl diff --git a/benchmarks/opencl/bfs/libbfs.a b/benchmarks/old_opencl/bfs/libbfs.a similarity index 100% rename from benchmarks/opencl/bfs/libbfs.a rename to benchmarks/old_opencl/bfs/libbfs.a diff --git a/benchmarks/new_opencl/bfs/main.cc b/benchmarks/old_opencl/bfs/main.cc similarity index 97% rename from benchmarks/new_opencl/bfs/main.cc rename to benchmarks/old_opencl/bfs/main.cc index 138ec864..10a40c4c 100755 --- a/benchmarks/new_opencl/bfs/main.cc +++ b/benchmarks/old_opencl/bfs/main.cc @@ -187,7 +187,7 @@ int main(int argc, char *argv[]) { FILE *fp; Node *h_graph_nodes; char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited; - + try { char *input_f = "graph4096.txt"; printf("Reading File\n"); diff --git a/benchmarks/new_opencl/bfs/run b/benchmarks/old_opencl/bfs/run similarity index 100% rename from benchmarks/new_opencl/bfs/run rename to benchmarks/old_opencl/bfs/run diff --git a/benchmarks/new_opencl/bfs/timer.cc b/benchmarks/old_opencl/bfs/timer.cc similarity index 100% rename from benchmarks/new_opencl/bfs/timer.cc rename to benchmarks/old_opencl/bfs/timer.cc diff --git a/benchmarks/new_opencl/bfs/timer.h b/benchmarks/old_opencl/bfs/timer.h similarity index 100% rename from benchmarks/new_opencl/bfs/timer.h rename to benchmarks/old_opencl/bfs/timer.h diff --git a/benchmarks/new_opencl/bfs/util.h b/benchmarks/old_opencl/bfs/util.h similarity index 100% rename from benchmarks/new_opencl/bfs/util.h rename to benchmarks/old_opencl/bfs/util.h diff --git a/benchmarks/old_opencl/convolution/Makefile b/benchmarks/old_opencl/convolution/Makefile new file mode 100644 index 00000000..42c8605a --- /dev/null +++ b/benchmarks/old_opencl/convolution/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = convolution + +SRCS = main.cpp utils.cpp + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/new_opencl/convolution/input.bmp b/benchmarks/old_opencl/convolution/input.bmp similarity index 100% rename from benchmarks/new_opencl/convolution/input.bmp rename to benchmarks/old_opencl/convolution/input.bmp diff --git a/benchmarks/new_opencl/convolution/kernel.cl b/benchmarks/old_opencl/convolution/kernel.cl similarity index 96% rename from benchmarks/new_opencl/convolution/kernel.cl rename to benchmarks/old_opencl/convolution/kernel.cl index ff56dc6f..e0e4da3a 100755 --- a/benchmarks/new_opencl/convolution/kernel.cl +++ b/benchmarks/old_opencl/convolution/kernel.cl @@ -1,54 +1,54 @@ -__kernel -void convolution( - __read_only image2d_t sourceImage, - __write_only image2d_t outputImage, - int rows, - int cols, - __constant float* filter, - int filterWidth, - sampler_t sampler) -{ - // Store each work-item’s unique row and column - int column = get_global_id(0); - int row = get_global_id(1); - - // Half the width of the filter is needed for indexing - // memory later - int halfWidth = (int)(filterWidth/2); - - // All accesses to images return data as four-element vector - // (i.e., float4), although only the 'x' component will contain - // meaningful data in this code - float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; - - // Iterator for the filter - int filterIdx = 0; - - // Each work-item iterates around its local area based on the - // size of the filter - int2 coords; // Coordinates for accessing the image - // Iterate the filter rows - for(int i = -halfWidth; i <= halfWidth; i++) { - coords.y = row + i; - - // Iterate over the filter columns - for(int j = -halfWidth; j <= halfWidth; j++) { - coords.x = column + j; - - float4 pixel; - // Read a pixel from the image. A single channel image - // stores the pixel in the 'x' coordinate of the returned - // vector. - pixel = read_imagef(sourceImage, sampler, coords); - sum.x += pixel.x * filter[filterIdx++]; - } - } - - // Copy the data to the output image if the - // work-item is in bounds - if(row < rows && column < cols) { - coords.x = column; - coords.y = row; - write_imagef(outputImage, coords, sum); - } +__kernel +void convolution( + __read_only image2d_t sourceImage, + __write_only image2d_t outputImage, + int rows, + int cols, + __constant float* filter, + int filterWidth, + sampler_t sampler) +{ + // Store each work-item’s unique row and column + int column = get_global_id(0); + int row = get_global_id(1); + + // Half the width of the filter is needed for indexing + // memory later + int halfWidth = (int)(filterWidth/2); + + // All accesses to images return data as four-element vector + // (i.e., float4), although only the 'x' component will contain + // meaningful data in this code + float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; + + // Iterator for the filter + int filterIdx = 0; + + // Each work-item iterates around its local area based on the + // size of the filter + int2 coords; // Coordinates for accessing the image + // Iterate the filter rows + for(int i = -halfWidth; i <= halfWidth; i++) { + coords.y = row + i; + + // Iterate over the filter columns + for(int j = -halfWidth; j <= halfWidth; j++) { + coords.x = column + j; + + float4 pixel; + // Read a pixel from the image. A single channel image + // stores the pixel in the 'x' coordinate of the returned + // vector. + pixel = read_imagef(sourceImage, sampler, coords); + sum.x += pixel.x * filter[filterIdx++]; + } + } + + // Copy the data to the output image if the + // work-item is in bounds + if(row < rows && column < cols) { + coords.x = column; + coords.y = row; + write_imagef(outputImage, coords, sum); + } } \ No newline at end of file diff --git a/benchmarks/new_opencl/convolution/main.cpp b/benchmarks/old_opencl/convolution/main.cpp similarity index 96% rename from benchmarks/new_opencl/convolution/main.cpp rename to benchmarks/old_opencl/convolution/main.cpp index f5bf1584..5db2ae57 100755 --- a/benchmarks/new_opencl/convolution/main.cpp +++ b/benchmarks/old_opencl/convolution/main.cpp @@ -1,261 +1,261 @@ -#include -#include -#include - -#include "utils.h" - -// This function takes a positive integer and rounds it up to -// the nearest multiple of another provided integer -unsigned int roundUp(unsigned int value, unsigned int multiple) { - - // Determine how far past the nearest multiple the value is - unsigned int remainder = value % multiple; - - // Add the difference to make the value a multiple - if(remainder != 0) { - value += (multiple-remainder); - } - - return value; -} - -// This function reads in a text file and stores it as a char pointer -char* readSource(char* kernelPath) { - - cl_int status; - FILE *fp; - char *source; - long int size; - - printf("Program file is: %s\n", kernelPath); - - fp = fopen(kernelPath, "rb"); - if(!fp) { - printf("Could not open kernel file\n"); - exit(-1); - } - status = fseek(fp, 0, SEEK_END); - if(status != 0) { - printf("Error seeking to end of file\n"); - exit(-1); - } - size = ftell(fp); - if(size < 0) { - printf("Error getting file position\n"); - exit(-1); - } - - rewind(fp); - - source = (char *)malloc(size + 1); - - int i; - for (i = 0; i < size+1; i++) { - source[i]='\0'; - } - - if(source == NULL) { - printf("Error allocating space for the kernel source\n"); - exit(-1); - } - - fread(source, 1, size, fp); - source[size] = '\0'; - - return source; -} - -void chk(cl_int status, const char* cmd) { - - if(status != CL_SUCCESS) { - printf("%s failed (%d)\n", cmd, status); - exit(-1); - } -} - -int main() { - - int i, j, k, l; - - // Rows and columns in the input image - int imageHeight; - int imageWidth; - - const char* inputFile = "input.bmp"; - const char* outputFile = "output.bmp"; - - // Homegrown function to read a BMP from file - float* inputImage = readImage(inputFile, &imageWidth, - &imageHeight); - - // Size of the input and output images on the host - int dataSize = imageHeight*imageWidth*sizeof(float); - - // Output image on the host - float* outputImage = NULL; - outputImage = (float*)malloc(dataSize); - float* refImage = NULL; - refImage = (float*)malloc(dataSize); - - // 45 degree motion blur - float filter[49] = - {0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, -1, 0, 1, 0, 0, - 0, 0, -2, 0, 2, 0, 0, - 0, 0, -1, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0}; - - // The convolution filter is 7x7 - int filterWidth = 7; - int filterSize = filterWidth*filterWidth; // Assume a square kernel - - // Set up the OpenCL environment - cl_int status; - - // Discovery platform - cl_platform_id platform; - status = clGetPlatformIDs(1, &platform, NULL); - chk(status, "clGetPlatformIDs"); - - // Discover device - cl_device_id device; - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); - chk(status, "clGetDeviceIDs"); - - // Create context - cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, - (cl_context_properties)(platform), 0}; - cl_context context; - context = clCreateContext(props, 1, &device, NULL, NULL, &status); - chk(status, "clCreateContext"); - - // Create command queue - cl_command_queue queue; - queue = clCreateCommandQueue(context, device, 0, &status); - chk(status, "clCreateCommandQueue"); - - // The image format describes how the data will be stored in memory - cl_image_format format; - format.image_channel_order = CL_R; // single channel - format.image_channel_data_type = CL_FLOAT; // float data type - - // Create space for the source image on the device - cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth, - imageHeight, 0, NULL, &status); - chk(status, "clCreateImage2D"); - - // Create space for the output image on the device - cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth, - imageHeight, 0, NULL, &status); - chk(status, "clCreateImage2D"); - - // Create space for the 7x7 filter on the device - cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float), - NULL, &status); - chk(status, "clCreateBuffer"); - - // Copy the source image to the device - size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from - size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension - status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region, - 0, 0, inputImage, 0, NULL, NULL); - chk(status, "clEnqueueWriteImage"); - - // Copy the 7x7 filter to the device - status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0, - filterSize*sizeof(float), filter, 0, NULL, NULL); - chk(status, "clEnqueueWriteBuffer"); - - // Create the image sampler - cl_sampler sampler = clCreateSampler(context, CL_FALSE, - CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); - chk(status, "clCreateSampler"); - - const char* source = readSource("kernel.cl"); - - // Create a program object with source and build it - cl_program program; - program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); - chk(status, "clCreateProgramWithSource"); - status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); - chk(status, "clBuildProgram"); - - // Create the kernel object - cl_kernel kernel; - kernel = clCreateKernel(program, "convolution", &status); - chk(status, "clCreateKernel"); - - // Set the kernel arguments - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); - status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); - status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight); - status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth); - status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter); - status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); - status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler); - chk(status, "clSetKernelArg"); - - // Set the work item dimensions - size_t globalSize[2] = {imageWidth, imageHeight}; - status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, - NULL, NULL); - chk(status, "clEnqueueNDRange"); - - // Read the image back to the host - status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, - region, 0, 0, outputImage, 0, NULL, NULL); - chk(status, "clEnqueueReadImage"); - - // Write the output image to file - storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); - - // Compute the reference image - for(i = 0; i < imageHeight; i++) { - for(j = 0; j < imageWidth; j++) { - refImage[i*imageWidth+j] = 0; - } - } - - // Iterate over the rows of the source image - int halfFilterWidth = filterWidth/2; - float sum; - for(i = 0; i < imageHeight; i++) { - // Iterate over the columns of the source image - for(j = 0; j < imageWidth; j++) { - sum = 0; // Reset sum for new source pixel - // Apply the filter to the neighborhood - for(k = - halfFilterWidth; k <= halfFilterWidth; k++) { - for(l = - halfFilterWidth; l <= halfFilterWidth; l++) { - if(i+k >= 0 && i+k < imageHeight && - j+l >= 0 && j+l < imageWidth) { - sum += inputImage[(i+k)*imageWidth + j+l] * - filter[(k+halfFilterWidth)*filterWidth + - l+halfFilterWidth]; - } - } - } - refImage[i*imageWidth+j] = sum; - } - } - - int failed = 0; - for(i = 0; i < imageHeight; i++) { - for(j = 0; j < imageWidth; j++) { - if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) { - printf("Results are INCORRECT\n"); - printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j, - outputImage[i*imageWidth+j], refImage[i*imageWidth+j]); - failed = 1; - } - if(failed) break; - } - if(failed) break; - } - if(!failed) { - printf("Results are correct\n"); - } - - return 0; +#include +#include +#include + +#include "utils.h" + +// This function takes a positive integer and rounds it up to +// the nearest multiple of another provided integer +unsigned int roundUp(unsigned int value, unsigned int multiple) { + + // Determine how far past the nearest multiple the value is + unsigned int remainder = value % multiple; + + // Add the difference to make the value a multiple + if(remainder != 0) { + value += (multiple-remainder); + } + + return value; +} + +// This function reads in a text file and stores it as a char pointer +char* readSource(char* kernelPath) { + + cl_int status; + FILE *fp; + char *source; + long int size; + + printf("Program file is: %s\n", kernelPath); + + fp = fopen(kernelPath, "rb"); + if(!fp) { + printf("Could not open kernel file\n"); + exit(-1); + } + status = fseek(fp, 0, SEEK_END); + if(status != 0) { + printf("Error seeking to end of file\n"); + exit(-1); + } + size = ftell(fp); + if(size < 0) { + printf("Error getting file position\n"); + exit(-1); + } + + rewind(fp); + + source = (char *)malloc(size + 1); + + int i; + for (i = 0; i < size+1; i++) { + source[i]='\0'; + } + + if(source == NULL) { + printf("Error allocating space for the kernel source\n"); + exit(-1); + } + + fread(source, 1, size, fp); + source[size] = '\0'; + + return source; +} + +void chk(cl_int status, const char* cmd) { + + if(status != CL_SUCCESS) { + printf("%s failed (%d)\n", cmd, status); + exit(-1); + } +} + +int main() { + + int i, j, k, l; + + // Rows and columns in the input image + int imageHeight; + int imageWidth; + + const char* inputFile = "input.bmp"; + const char* outputFile = "output.bmp"; + + // Homegrown function to read a BMP from file + float* inputImage = readImage(inputFile, &imageWidth, + &imageHeight); + + // Size of the input and output images on the host + int dataSize = imageHeight*imageWidth*sizeof(float); + + // Output image on the host + float* outputImage = NULL; + outputImage = (float*)malloc(dataSize); + float* refImage = NULL; + refImage = (float*)malloc(dataSize); + + // 45 degree motion blur + float filter[49] = + {0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, -1, 0, 1, 0, 0, + 0, 0, -2, 0, 2, 0, 0, + 0, 0, -1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0}; + + // The convolution filter is 7x7 + int filterWidth = 7; + int filterSize = filterWidth*filterWidth; // Assume a square kernel + + // Set up the OpenCL environment + cl_int status; + + // Discovery platform + cl_platform_id platform; + status = clGetPlatformIDs(1, &platform, NULL); + chk(status, "clGetPlatformIDs"); + + // Discover device + cl_device_id device; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); + chk(status, "clGetDeviceIDs"); + + // Create context + cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platform), 0}; + cl_context context; + context = clCreateContext(props, 1, &device, NULL, NULL, &status); + chk(status, "clCreateContext"); + + // Create command queue + cl_command_queue queue; + queue = clCreateCommandQueue(context, device, 0, &status); + chk(status, "clCreateCommandQueue"); + + // The image format describes how the data will be stored in memory + cl_image_format format; + format.image_channel_order = CL_R; // single channel + format.image_channel_data_type = CL_FLOAT; // float data type + + // Create space for the source image on the device + cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth, + imageHeight, 0, NULL, &status); + chk(status, "clCreateImage2D"); + + // Create space for the output image on the device + cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth, + imageHeight, 0, NULL, &status); + chk(status, "clCreateImage2D"); + + // Create space for the 7x7 filter on the device + cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float), + NULL, &status); + chk(status, "clCreateBuffer"); + + // Copy the source image to the device + size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from + size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension + status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region, + 0, 0, inputImage, 0, NULL, NULL); + chk(status, "clEnqueueWriteImage"); + + // Copy the 7x7 filter to the device + status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0, + filterSize*sizeof(float), filter, 0, NULL, NULL); + chk(status, "clEnqueueWriteBuffer"); + + // Create the image sampler + cl_sampler sampler = clCreateSampler(context, CL_FALSE, + CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); + chk(status, "clCreateSampler"); + + const char* source = readSource("kernel.cl"); + + // Create a program object with source and build it + cl_program program; + program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); + chk(status, "clCreateProgramWithSource"); + status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); + chk(status, "clBuildProgram"); + + // Create the kernel object + cl_kernel kernel; + kernel = clCreateKernel(program, "convolution", &status); + chk(status, "clCreateKernel"); + + // Set the kernel arguments + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); + status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); + status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight); + status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth); + status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter); + status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); + status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler); + chk(status, "clSetKernelArg"); + + // Set the work item dimensions + size_t globalSize[2] = {imageWidth, imageHeight}; + status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, + NULL, NULL); + chk(status, "clEnqueueNDRange"); + + // Read the image back to the host + status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, + region, 0, 0, outputImage, 0, NULL, NULL); + chk(status, "clEnqueueReadImage"); + + // Write the output image to file + storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); + + // Compute the reference image + for(i = 0; i < imageHeight; i++) { + for(j = 0; j < imageWidth; j++) { + refImage[i*imageWidth+j] = 0; + } + } + + // Iterate over the rows of the source image + int halfFilterWidth = filterWidth/2; + float sum; + for(i = 0; i < imageHeight; i++) { + // Iterate over the columns of the source image + for(j = 0; j < imageWidth; j++) { + sum = 0; // Reset sum for new source pixel + // Apply the filter to the neighborhood + for(k = - halfFilterWidth; k <= halfFilterWidth; k++) { + for(l = - halfFilterWidth; l <= halfFilterWidth; l++) { + if(i+k >= 0 && i+k < imageHeight && + j+l >= 0 && j+l < imageWidth) { + sum += inputImage[(i+k)*imageWidth + j+l] * + filter[(k+halfFilterWidth)*filterWidth + + l+halfFilterWidth]; + } + } + } + refImage[i*imageWidth+j] = sum; + } + } + + int failed = 0; + for(i = 0; i < imageHeight; i++) { + for(j = 0; j < imageWidth; j++) { + if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) { + printf("Results are INCORRECT\n"); + printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j, + outputImage[i*imageWidth+j], refImage[i*imageWidth+j]); + failed = 1; + } + if(failed) break; + } + if(failed) break; + } + if(!failed) { + printf("Results are correct\n"); + } + + return 0; } \ No newline at end of file diff --git a/benchmarks/new_opencl/convolution/utils.cpp b/benchmarks/old_opencl/convolution/utils.cpp similarity index 100% rename from benchmarks/new_opencl/convolution/utils.cpp rename to benchmarks/old_opencl/convolution/utils.cpp diff --git a/benchmarks/new_opencl/convolution/utils.h b/benchmarks/old_opencl/convolution/utils.h similarity index 100% rename from benchmarks/new_opencl/convolution/utils.h rename to benchmarks/old_opencl/convolution/utils.h diff --git a/benchmarks/opencl/cutcp/Makefile b/benchmarks/old_opencl/cutcp/Makefile similarity index 100% rename from benchmarks/opencl/cutcp/Makefile rename to benchmarks/old_opencl/cutcp/Makefile diff --git a/benchmarks/opencl/cutcp/args.c b/benchmarks/old_opencl/cutcp/args.c similarity index 100% rename from benchmarks/opencl/cutcp/args.c rename to benchmarks/old_opencl/cutcp/args.c diff --git a/benchmarks/opencl/cutcp/atom.h b/benchmarks/old_opencl/cutcp/atom.h similarity index 100% rename from benchmarks/opencl/cutcp/atom.h rename to benchmarks/old_opencl/cutcp/atom.h diff --git a/benchmarks/opencl/cutcp/cutcpu.c b/benchmarks/old_opencl/cutcp/cutcpu.c similarity index 100% rename from benchmarks/opencl/cutcp/cutcpu.c rename to benchmarks/old_opencl/cutcp/cutcpu.c diff --git a/benchmarks/opencl/cutcp/cutoff.c b/benchmarks/old_opencl/cutcp/cutoff.c similarity index 100% rename from benchmarks/opencl/cutcp/cutoff.c rename to benchmarks/old_opencl/cutcp/cutoff.c diff --git a/benchmarks/opencl/cutcp/cutoff.h b/benchmarks/old_opencl/cutcp/cutoff.h similarity index 100% rename from benchmarks/opencl/cutcp/cutoff.h rename to benchmarks/old_opencl/cutcp/cutoff.h diff --git a/benchmarks/opencl/cutcp/excl.c b/benchmarks/old_opencl/cutcp/excl.c similarity index 100% rename from benchmarks/opencl/cutcp/excl.c rename to benchmarks/old_opencl/cutcp/excl.c diff --git a/benchmarks/opencl/cutcp/gpu_info.c b/benchmarks/old_opencl/cutcp/gpu_info.c similarity index 100% rename from benchmarks/opencl/cutcp/gpu_info.c rename to benchmarks/old_opencl/cutcp/gpu_info.c diff --git a/benchmarks/opencl/cutcp/gpu_info.h b/benchmarks/old_opencl/cutcp/gpu_info.h similarity index 100% rename from benchmarks/opencl/cutcp/gpu_info.h rename to benchmarks/old_opencl/cutcp/gpu_info.h diff --git a/benchmarks/opencl/cutcp/kernel.cl b/benchmarks/old_opencl/cutcp/kernel.cl similarity index 100% rename from benchmarks/opencl/cutcp/kernel.cl rename to benchmarks/old_opencl/cutcp/kernel.cl diff --git a/benchmarks/opencl/cutcp/libcutcp.a b/benchmarks/old_opencl/cutcp/libcutcp.a similarity index 100% rename from benchmarks/opencl/cutcp/libcutcp.a rename to benchmarks/old_opencl/cutcp/libcutcp.a diff --git a/benchmarks/opencl/cutcp/macros.h b/benchmarks/old_opencl/cutcp/macros.h similarity index 100% rename from benchmarks/opencl/cutcp/macros.h rename to benchmarks/old_opencl/cutcp/macros.h diff --git a/benchmarks/opencl/cutcp/main.cc b/benchmarks/old_opencl/cutcp/main.cc similarity index 100% rename from benchmarks/opencl/cutcp/main.cc rename to benchmarks/old_opencl/cutcp/main.cc diff --git a/benchmarks/opencl/cutcp/ocl.c b/benchmarks/old_opencl/cutcp/ocl.c similarity index 100% rename from benchmarks/opencl/cutcp/ocl.c rename to benchmarks/old_opencl/cutcp/ocl.c diff --git a/benchmarks/opencl/cutcp/ocl.h b/benchmarks/old_opencl/cutcp/ocl.h similarity index 100% rename from benchmarks/opencl/cutcp/ocl.h rename to benchmarks/old_opencl/cutcp/ocl.h diff --git a/benchmarks/opencl/cutcp/output.c b/benchmarks/old_opencl/cutcp/output.c similarity index 100% rename from benchmarks/opencl/cutcp/output.c rename to benchmarks/old_opencl/cutcp/output.c diff --git a/benchmarks/opencl/cutcp/output.h b/benchmarks/old_opencl/cutcp/output.h similarity index 100% rename from benchmarks/opencl/cutcp/output.h rename to benchmarks/old_opencl/cutcp/output.h diff --git a/benchmarks/opencl/cutcp/parboil.h b/benchmarks/old_opencl/cutcp/parboil.h similarity index 100% rename from benchmarks/opencl/cutcp/parboil.h rename to benchmarks/old_opencl/cutcp/parboil.h diff --git a/benchmarks/opencl/cutcp/parboil_opencl.c b/benchmarks/old_opencl/cutcp/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/cutcp/parboil_opencl.c rename to benchmarks/old_opencl/cutcp/parboil_opencl.c diff --git a/benchmarks/opencl/cutcp/readatom.c b/benchmarks/old_opencl/cutcp/readatom.c similarity index 100% rename from benchmarks/opencl/cutcp/readatom.c rename to benchmarks/old_opencl/cutcp/readatom.c diff --git a/benchmarks/opencl/cutcp/watbox.sl40.pqr b/benchmarks/old_opencl/cutcp/watbox.sl40.pqr similarity index 100% rename from benchmarks/opencl/cutcp/watbox.sl40.pqr rename to benchmarks/old_opencl/cutcp/watbox.sl40.pqr diff --git a/benchmarks/old_opencl/guassian/Makefile b/benchmarks/old_opencl/guassian/Makefile new file mode 100644 index 00000000..55f7620c --- /dev/null +++ b/benchmarks/old_opencl/guassian/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = gaussian + +SRCS = main.cc clutils.cpp utils.cpp + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/new_opencl/guassian/OriginalParallel.c b/benchmarks/old_opencl/guassian/OriginalParallel.c similarity index 100% rename from benchmarks/new_opencl/guassian/OriginalParallel.c rename to benchmarks/old_opencl/guassian/OriginalParallel.c diff --git a/benchmarks/new_opencl/guassian/README.txt b/benchmarks/old_opencl/guassian/README.txt similarity index 100% rename from benchmarks/new_opencl/guassian/README.txt rename to benchmarks/old_opencl/guassian/README.txt diff --git a/benchmarks/new_opencl/nearn/clutils.cpp b/benchmarks/old_opencl/guassian/clutils.cpp similarity index 97% rename from benchmarks/new_opencl/nearn/clutils.cpp rename to benchmarks/old_opencl/guassian/clutils.cpp index 6bc42304..518a4a3e 100755 --- a/benchmarks/new_opencl/nearn/clutils.cpp +++ b/benchmarks/old_opencl/guassian/clutils.cpp @@ -88,6 +88,7 @@ static cl_command_queue commandQueueNoProf = NULL; //! Global status of events static bool eventsEnabled = false; + //------------------------------------------------------- // Initialization and Cleanup //------------------------------------------------------- @@ -238,28 +239,6 @@ static bool eventsEnabled = false; return context; }*/ -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - - cl_context cl_init_context(int platform, int dev,int quiet) { int printInfo=1; if (platform >= 0 && dev >= 0) printInfo = 0; @@ -858,22 +837,13 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos fread(source, 1, size, fp); source[size] = '\0';*/ - // read kernel binary from file - uint8_t *kernel_bin = NULL; - size_t kernel_size; - cl_int binary_status = 0; - int err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); - cl_errChk(err, "read_kernel_file", true); - // Create the program object - //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); - cl_program clProgramReturn = clCreateProgramWithBinary( - context, 1, devices, &kernel_size, &kernel_bin, &binary_status, &status); - free(kernel_bin); + //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); + cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); cl_errChk(status, "Creating program", true); - //free(source); - //fclose(fp); + free(source); + fclose(fp); // Try to compile the program status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); @@ -1440,4 +1410,4 @@ char* itoa_portable(int value, char* result, int base) { } return result; -} \ No newline at end of file +} diff --git a/benchmarks/new_opencl/guassian/clutils.h b/benchmarks/old_opencl/guassian/clutils.h similarity index 100% rename from benchmarks/new_opencl/guassian/clutils.h rename to benchmarks/old_opencl/guassian/clutils.h diff --git a/benchmarks/new_opencl/guassian/gaussianElim.h b/benchmarks/old_opencl/guassian/gaussianElim.h similarity index 100% rename from benchmarks/new_opencl/guassian/gaussianElim.h rename to benchmarks/old_opencl/guassian/gaussianElim.h diff --git a/benchmarks/new_opencl/guassian/gettimeofday.cpp b/benchmarks/old_opencl/guassian/gettimeofday.cpp similarity index 100% rename from benchmarks/new_opencl/guassian/gettimeofday.cpp rename to benchmarks/old_opencl/guassian/gettimeofday.cpp diff --git a/benchmarks/new_opencl/guassian/gettimeofday.h b/benchmarks/old_opencl/guassian/gettimeofday.h similarity index 100% rename from benchmarks/new_opencl/guassian/gettimeofday.h rename to benchmarks/old_opencl/guassian/gettimeofday.h diff --git a/benchmarks/new_opencl/guassian/kernel.cl b/benchmarks/old_opencl/guassian/kernel.cl similarity index 100% rename from benchmarks/new_opencl/guassian/kernel.cl rename to benchmarks/old_opencl/guassian/kernel.cl diff --git a/benchmarks/opencl/guassian/libgaussian.a b/benchmarks/old_opencl/guassian/libgaussian.a similarity index 100% rename from benchmarks/opencl/guassian/libgaussian.a rename to benchmarks/old_opencl/guassian/libgaussian.a diff --git a/benchmarks/new_opencl/guassian/main.cc b/benchmarks/old_opencl/guassian/main.cc similarity index 96% rename from benchmarks/new_opencl/guassian/main.cc rename to benchmarks/old_opencl/guassian/main.cc index 45261cc6..1b852908 100755 --- a/benchmarks/new_opencl/guassian/main.cc +++ b/benchmarks/old_opencl/guassian/main.cc @@ -94,9 +94,10 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size, cl_event writeEvent, kernelEvent, readEvent; float writeTime = 0, readTime = 0, kernelTime = 0; float writeMB = 0, readMB = 0; - - gaussianElim_program = cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); - + + gaussianElim_program = + cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); + fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status); status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true); if (status) diff --git a/benchmarks/new_opencl/guassian/matrix4.txt b/benchmarks/old_opencl/guassian/matrix4.txt similarity index 100% rename from benchmarks/new_opencl/guassian/matrix4.txt rename to benchmarks/old_opencl/guassian/matrix4.txt diff --git a/benchmarks/new_opencl/guassian/run b/benchmarks/old_opencl/guassian/run similarity index 100% rename from benchmarks/new_opencl/guassian/run rename to benchmarks/old_opencl/guassian/run diff --git a/benchmarks/new_opencl/guassian/utils.cpp b/benchmarks/old_opencl/guassian/utils.cpp similarity index 100% rename from benchmarks/new_opencl/guassian/utils.cpp rename to benchmarks/old_opencl/guassian/utils.cpp diff --git a/benchmarks/new_opencl/guassian/utils.h b/benchmarks/old_opencl/guassian/utils.h similarity index 100% rename from benchmarks/new_opencl/guassian/utils.h rename to benchmarks/old_opencl/guassian/utils.h diff --git a/benchmarks/new_opencl/include/CL/cl.h b/benchmarks/old_opencl/include/CL/cl.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl.h rename to benchmarks/old_opencl/include/CL/cl.h diff --git a/benchmarks/new_opencl/include/CL/cl.hpp b/benchmarks/old_opencl/include/CL/cl.hpp similarity index 100% rename from benchmarks/new_opencl/include/CL/cl.hpp rename to benchmarks/old_opencl/include/CL/cl.hpp diff --git a/benchmarks/new_opencl/include/CL/cl2.hpp b/benchmarks/old_opencl/include/CL/cl2.hpp similarity index 100% rename from benchmarks/new_opencl/include/CL/cl2.hpp rename to benchmarks/old_opencl/include/CL/cl2.hpp diff --git a/benchmarks/new_opencl/include/CL/cl_d3d10.h b/benchmarks/old_opencl/include/CL/cl_d3d10.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_d3d10.h rename to benchmarks/old_opencl/include/CL/cl_d3d10.h diff --git a/benchmarks/new_opencl/include/CL/cl_d3d11.h b/benchmarks/old_opencl/include/CL/cl_d3d11.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_d3d11.h rename to benchmarks/old_opencl/include/CL/cl_d3d11.h diff --git a/benchmarks/new_opencl/include/CL/cl_dx9_media_sharing.h b/benchmarks/old_opencl/include/CL/cl_dx9_media_sharing.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_dx9_media_sharing.h rename to benchmarks/old_opencl/include/CL/cl_dx9_media_sharing.h diff --git a/benchmarks/new_opencl/include/CL/cl_dx9_media_sharing_intel.h b/benchmarks/old_opencl/include/CL/cl_dx9_media_sharing_intel.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_dx9_media_sharing_intel.h rename to benchmarks/old_opencl/include/CL/cl_dx9_media_sharing_intel.h diff --git a/benchmarks/new_opencl/include/CL/cl_egl.h b/benchmarks/old_opencl/include/CL/cl_egl.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_egl.h rename to benchmarks/old_opencl/include/CL/cl_egl.h diff --git a/benchmarks/new_opencl/include/CL/cl_ext.h b/benchmarks/old_opencl/include/CL/cl_ext.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_ext.h rename to benchmarks/old_opencl/include/CL/cl_ext.h diff --git a/benchmarks/new_opencl/include/CL/cl_ext_intel.h b/benchmarks/old_opencl/include/CL/cl_ext_intel.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_ext_intel.h rename to benchmarks/old_opencl/include/CL/cl_ext_intel.h diff --git a/benchmarks/new_opencl/include/CL/cl_gl.h b/benchmarks/old_opencl/include/CL/cl_gl.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_gl.h rename to benchmarks/old_opencl/include/CL/cl_gl.h diff --git a/benchmarks/new_opencl/include/CL/cl_gl_ext.h b/benchmarks/old_opencl/include/CL/cl_gl_ext.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_gl_ext.h rename to benchmarks/old_opencl/include/CL/cl_gl_ext.h diff --git a/benchmarks/new_opencl/include/CL/cl_platform.h b/benchmarks/old_opencl/include/CL/cl_platform.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_platform.h rename to benchmarks/old_opencl/include/CL/cl_platform.h diff --git a/benchmarks/new_opencl/include/CL/cl_va_api_media_sharing_intel.h b/benchmarks/old_opencl/include/CL/cl_va_api_media_sharing_intel.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_va_api_media_sharing_intel.h rename to benchmarks/old_opencl/include/CL/cl_va_api_media_sharing_intel.h diff --git a/benchmarks/new_opencl/include/CL/cl_version.h b/benchmarks/old_opencl/include/CL/cl_version.h similarity index 100% rename from benchmarks/new_opencl/include/CL/cl_version.h rename to benchmarks/old_opencl/include/CL/cl_version.h diff --git a/benchmarks/new_opencl/include/CL/opencl.h b/benchmarks/old_opencl/include/CL/opencl.h similarity index 100% rename from benchmarks/new_opencl/include/CL/opencl.h rename to benchmarks/old_opencl/include/CL/opencl.h diff --git a/benchmarks/old_opencl/kmeans/Makefile b/benchmarks/old_opencl/kmeans/Makefile new file mode 100644 index 00000000..d00909ed --- /dev/null +++ b/benchmarks/old_opencl/kmeans/Makefile @@ -0,0 +1,79 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = kmeans +SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +kmeans_clustering.o: kmeans_clustering.c + $(CC) $(CXXFLAGS) -c kmeans_clustering.c + +cluster.o: cluster.c + $(CC) $(CXXFLAGS) -c cluster.c + +read_input.o: read_input.c + $(CC) $(CXXFLAGS) -c read_input.c + +rmse.o: rmse.c + $(CC) $(CXXFLAGS) -c rmse.c + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/new_opencl/kmeans/README b/benchmarks/old_opencl/kmeans/README similarity index 100% rename from benchmarks/new_opencl/kmeans/README rename to benchmarks/old_opencl/kmeans/README diff --git a/benchmarks/new_opencl/kmeans/cluster.c b/benchmarks/old_opencl/kmeans/cluster.c similarity index 100% rename from benchmarks/new_opencl/kmeans/cluster.c rename to benchmarks/old_opencl/kmeans/cluster.c diff --git a/benchmarks/new_opencl/kmeans/getopt.c b/benchmarks/old_opencl/kmeans/getopt.c similarity index 97% rename from benchmarks/new_opencl/kmeans/getopt.c rename to benchmarks/old_opencl/kmeans/getopt.c index de98d917..fa2f3137 100755 --- a/benchmarks/new_opencl/kmeans/getopt.c +++ b/benchmarks/old_opencl/kmeans/getopt.c @@ -1,1184 +1,1184 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to drepper@gnu.org - before changing it! - Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* This tells Alpha OSF/1 not to define a getopt prototype in . - Ditto for AIX 3.2 and . */ -#ifndef _NO_PROTO -# define _NO_PROTO -#endif - -#ifdef HAVE_CONFIG_H -# include -#endif - -#if !defined __STDC__ || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -# ifndef const -# define const -# endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#define GETOPT_INTERFACE_VERSION 2 -#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 -# include -# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION -# define ELIDE_CODE -# endif -#endif - -#ifndef ELIDE_CODE - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -/* Don't include stdlib.h for non-GNU C libraries because some of them - contain conflicting prototypes for getopt. */ -# include -# include -#endif /* GNU C library. */ - -#ifdef VMS -# include -# if HAVE_STRING_H - 0 -# include -# endif -#endif - -#ifndef _ -/* This is for other GNU distributions with internationalized messages. */ -# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC -# include -# ifndef _ -# define _(msgid) gettext (msgid) -# endif -# else -# define _(msgid) (msgid) -# endif -# if defined _LIBC && defined USE_IN_LIBIO -# include -# endif -#endif - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable POSIXLY_CORRECT disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* 1003.2 says this must be 1 before any call. */ -int optind = 1; - -/* Formerly, initialization of getopt depended on optind==0, which - causes problems with re-calling getopt as programs generally don't - know that. */ - -int __getopt_initialized; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters. - - PERMUTE is the default. We permute the contents of ARGV as we scan, - so that eventually all the non-options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code 1. - Using `-' as the first character of the list of option characters - selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return -1 with `optind' != ARGC. */ - -static enum -{ - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER -} ordering; - -/* Value of POSIXLY_CORRECT environment variable. */ -static char *posixly_correct; - -#ifdef __GNU_LIBRARY__ -/* We want to avoid inclusion of string.h with non-GNU libraries - because there are many ways it can cause trouble. - On some systems, it contains special magic macros that don't work - in GCC. */ -# include -# define my_index strchr -#else - -//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ -# include -//# else -//# include -//# endif - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -#ifndef getenv -extern char *getenv (); -#endif - -static char * -my_index (str, chr) - const char *str; - int chr; -{ - while (*str) - { - if (*str == chr) - return (char *) str; - str++; - } - return 0; -} - -/* If using GCC, we can safely declare strlen this way. - If not using GCC, it is ok not to declare it. */ -#ifdef __GNUC__ -/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. - That was relevant to code that was here before. */ -# if (!defined __STDC__ || !__STDC__) && !defined strlen -/* gcc with -traditional declares the built-in strlen to return int, - and has done so at least since version 2.4.5. -- rms. */ -extern int strlen (const char *); -# endif /* not __STDC__ */ -#endif /* __GNUC__ */ - -#endif /* not __GNU_LIBRARY__ */ - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -#ifdef _LIBC -/* Stored original parameters. - XXX This is no good solution. We should rather copy the args so - that we can compare them later. But we must not use malloc(3). */ -extern int __libc_argc; -extern char **__libc_argv; - -/* Bash 2.0 gives us an environment variable containing flags - indicating ARGV elements that should not be considered arguments. */ - -# ifdef USE_NONOPTION_FLAGS -/* Defined in getopt_init.c */ -extern char *__getopt_nonoption_flags; - -static int nonoption_flags_max_len; -static int nonoption_flags_len; -# endif - -# ifdef USE_NONOPTION_FLAGS -# define SWAP_FLAGS(ch1, ch2) \ - if (nonoption_flags_len > 0) \ - { \ - char __tmp = __getopt_nonoption_flags[ch1]; \ - __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ - __getopt_nonoption_flags[ch2] = __tmp; \ - } -# else -# define SWAP_FLAGS(ch1, ch2) -# endif -#else /* !_LIBC */ -# define SWAP_FLAGS(ch1, ch2) -#endif /* _LIBC */ - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -#if defined __STDC__ && __STDC__ -static void exchange (char **); -#endif - -static void -exchange (argv) - char **argv; -{ - int bottom = first_nonopt; - int middle = last_nonopt; - int top = optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - /* First make sure the handling of the `__getopt_nonoption_flags' - string can work normally. Our top argument must be in the range - of the string. */ - if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) - { - /* We must extend the array. The user plays games with us and - presents new arguments. */ - char *new_str = malloc (top + 1); - if (new_str == NULL) - nonoption_flags_len = nonoption_flags_max_len = 0; - else - { - memset (__mempcpy (new_str, __getopt_nonoption_flags, - nonoption_flags_max_len), - '\0', top + 1 - nonoption_flags_max_len); - nonoption_flags_max_len = top + 1; - __getopt_nonoption_flags = new_str; - } - } -#endif - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - SWAP_FLAGS (bottom + i, middle + i); - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Initialize the internal data when the first call is made. */ - -#if defined __STDC__ && __STDC__ -static const char *_getopt_initialize (int, char *const *, const char *); -#endif -static const char * -_getopt_initialize (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - first_nonopt = last_nonopt = optind; - - nextchar = NULL; - - posixly_correct = getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (posixly_correct != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - if (posixly_correct == NULL - && argc == __libc_argc && argv == __libc_argv) - { - if (nonoption_flags_max_len == 0) - { - if (__getopt_nonoption_flags == NULL - || __getopt_nonoption_flags[0] == '\0') - nonoption_flags_max_len = -1; - else - { - const char *orig_str = __getopt_nonoption_flags; - int len = nonoption_flags_max_len = strlen (orig_str); - if (nonoption_flags_max_len < argc) - nonoption_flags_max_len = argc; - __getopt_nonoption_flags = - (char *) malloc (nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) - nonoption_flags_max_len = -1; - else - memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), - '\0', nonoption_flags_max_len - len); - } - } - nonoption_flags_len = nonoption_flags_max_len; - } - else - nonoption_flags_len = 0; -#endif - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns -1. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - The elements of ARGV aren't really const, because we permute them. - But we pretend they're const in the prototype to be compatible - with other systems. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. */ - -int -_getopt_internal (argc, argv, optstring, longopts, longind, long_only) - int argc; - char *const *argv; - const char *optstring; - const struct option *longopts; - int *longind; - int long_only; -{ - int print_errors = opterr; - if (optstring[0] == ':') - print_errors = 0; - - if (argc < 1) - return -1; - - optarg = NULL; - - if (optind == 0 || !__getopt_initialized) - { - if (optind == 0) - optind = 1; /* Don't scan ARGV[0], the program name. */ - optstring = _getopt_initialize (argc, argv, optstring); - __getopt_initialized = 1; - } - - /* Test whether ARGV[optind] points to a non-option argument. - Either it does not have option syntax, or there is an environment flag - from the shell indicating it is not an option. The later information - is only used when the used in the GNU libc. */ -#if defined _LIBC && defined USE_NONOPTION_FLAGS -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ - || (optind < nonoption_flags_len \ - && __getopt_nonoption_flags[optind] == '1')) -#else -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') -#endif - - if (nextchar == NULL || *nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been - moved back by the user (who may also have changed the arguments). */ - if (last_nonopt > optind) - last_nonopt = optind; - if (first_nonopt > optind) - first_nonopt = optind; - - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc && NONOPTION_P) - optind++; - last_nonopt = optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return -1; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if (NONOPTION_P) - { - if (ordering == REQUIRE_ORDER) - return -1; - optarg = argv[optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - nextchar = (argv[optind] + 1 - + (longopts != NULL && argv[optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[optind][1] == '-' - || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = -1; - int option_index; - - for (nameend = nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) - == (unsigned int) strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[optind]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[optind]); -#endif - } - nextchar += strlen (nextchar); - optind++; - optopt = 0; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (argv[optind - 1][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#else - fprintf (stderr, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[optind - 1][0], - pfound->name); -#else - fprintf (stderr, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[optind - 1][0], pfound->name); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - - nextchar += strlen (nextchar); - - optopt = pfound->val; - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); -#endif - } - nextchar += strlen (nextchar); - optopt = pfound->val; - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[optind][1] == '-' - || my_index (optstring, *nextchar) == NULL) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (argv[optind][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), - argv[0], nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `--%s'\n"), - argv[0], nextchar); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[optind][0], nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[optind][0], nextchar); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - nextchar = (char *) ""; - optind++; - optopt = 0; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *nextchar++; - char *temp = my_index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == '\0') - ++optind; - - if (temp == NULL || c == ':') - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (posixly_correct) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: illegal option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); -#endif - } - else - { -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: invalid option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - optopt = c; - return '?'; - } - /* Convenience. Treat POSIX -W foo same as long option --foo */ - if (temp[0] == 'W' && temp[1] == ';') - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = 0; - int option_index; - - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option requires an argument -- %c\n"), - argv[0], c); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - return c; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - - /* optarg is now the argument, see if it's in the - table of longopts. */ - - for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[optind]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[optind]); -#endif - } - nextchar += strlen (nextchar); - optind++; - return '?'; - } - if (pfound != NULL) - { - option_index = indfound; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - - nextchar += strlen (nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("\ -%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); -#endif - } - nextchar += strlen (nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - nextchar = NULL; - return 'W'; /* Let the application handle it. */ - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != '\0') - { - optarg = nextchar; - optind++; - } - else - optarg = NULL; - nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, - _("%s: option requires an argument -- %c\n"), - argv[0], c); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = NULL; - } - } - return c; - } -} - -int -getopt (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0); -} - -#endif /* Not ELIDE_CODE. */ - - -/* Compile with -DTEST to make an executable for use in testing +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing the above definition of `getopt'. */ \ No newline at end of file diff --git a/benchmarks/new_opencl/kmeans/getopt.h b/benchmarks/old_opencl/kmeans/getopt.h similarity index 97% rename from benchmarks/new_opencl/kmeans/getopt.h rename to benchmarks/old_opencl/kmeans/getopt.h index 2a2e7577..bae04bf7 100755 --- a/benchmarks/new_opencl/kmeans/getopt.h +++ b/benchmarks/old_opencl/kmeans/getopt.h @@ -1,191 +1,191 @@ - - -/* getopt.h */ -/* Declarations for getopt. - Copyright (C) 1989-1994, 1996-1999, 2001 Free Software - Foundation, Inc. This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute - it and/or modify it under the terms of the GNU Lesser - General Public License as published by the Free Software - Foundation; either version 2.1 of the License, or - (at your option) any later version. - - The GNU C Library is distributed in the hope that it will - be useful, but WITHOUT ANY WARRANTY; without even the - implied warranty of MERCHANTABILITY or FITNESS FOR A - PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General - Public License along with the GNU C Library; if not, write - to the Free Software Foundation, Inc., 59 Temple Place, - Suite 330, Boston, MA 02111-1307 USA. */ - - - - - -#ifndef _GETOPT_H - -#ifndef __need_getopt -# define _GETOPT_H 1 -#endif - -/* If __GNU_LIBRARY__ is not already defined, either we are being used - standalone, or this is the first header included in the source file. - If we are being used with glibc, we need to include , but - that does not exist if we are standalone. So: if __GNU_LIBRARY__ is - not defined, include , which will pull in for us - if it's from glibc. (Why ctype.h? It's guaranteed to exist and it - doesn't flood the namespace with stuff the way some other headers do.) */ -#if !defined __GNU_LIBRARY__ -# include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -#ifndef __need_getopt -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ -# if (defined __STDC__ && __STDC__) || defined __cplusplus - const char *name; -# else - char *name; -# endif - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -# define no_argument 0 -# define required_argument 1 -# define optional_argument 2 -#endif /* need getopt */ - - -/* Get definitions and prototypes for functions to process the - arguments in ARGV (ARGC of them, minus the program name) for - options given in OPTS. - - Return the option character from OPTS just read. Return -1 when - there are no more options. For unrecognized options, or options - missing arguments, `optopt' is set to the option letter, and '?' is - returned. - - The OPTS string is a list of characters which are recognized option - letters, optionally followed by colons, specifying that that letter - takes an argument, to be placed in `optarg'. - - If a letter in OPTS is followed by two colons, its argument is - optional. This behavior is specific to the GNU `getopt'. - - The argument `--' causes premature termination of argument - scanning, explicitly telling `getopt' that there are no more - options. - - If OPTS begins with `--', then non-option arguments are treated as - arguments to the option '\0'. This behavior is specific to the GNU - `getopt'. */ - -#if (defined __STDC__ && __STDC__) || defined __cplusplus -# ifdef __GNU_LIBRARY__ -/* Many other libraries have conflicting prototypes for getopt, with - differences in the consts, in stdlib.h. To avoid compilation - errors, only prototype getopt for the GNU C library. */ -extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); -# else /* not __GNU_LIBRARY__ */ -extern int getopt (); -# endif /* __GNU_LIBRARY__ */ - -# ifndef __need_getopt -extern int getopt_long (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind); -extern int getopt_long_only (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind); - -/* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only); -# endif -#else /* not __STDC__ */ -extern int getopt (); -# ifndef __need_getopt -extern int getopt_long (); -extern int getopt_long_only (); - -extern int _getopt_internal (); -# endif -#endif /* __STDC__ */ - -#ifdef __cplusplus -} -#endif - -/* Make sure we later can get all the definitions and declarations. */ -#undef __need_getopt - -#endif /* getopt.h */ - + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/benchmarks/new_opencl/kmeans/kernel.cl b/benchmarks/old_opencl/kmeans/kernel.cl similarity index 95% rename from benchmarks/new_opencl/kmeans/kernel.cl rename to benchmarks/old_opencl/kmeans/kernel.cl index 81089878..11ca065e 100755 --- a/benchmarks/new_opencl/kmeans/kernel.cl +++ b/benchmarks/old_opencl/kmeans/kernel.cl @@ -1,61 +1,61 @@ -#ifndef FLT_MAX -#define FLT_MAX 3.40282347e+38 -#endif - -__kernel void -kmeans_kernel_c(__global float *feature, - __global float *clusters, - __global int *membership, - int npoints, - int nclusters, - int nfeatures, - int offset, - int size - ) -{ - unsigned int point_id = get_global_id(0); - int index = 0; - //const unsigned int point_id = get_global_id(0); - if (point_id < npoints) - { - float min_dist=FLT_MAX; - for (int i=0; i < nclusters; i++) { - - float dist = 0; - float ans = 0; - for (int l=0; l -#include -#include -#include -#include "kmeans.h" - -#define RANDOM_MAX 2147483647 - -extern double wtime(void); - -/*----< kmeans_clustering() >---------------------------------------------*/ -float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ - int nfeatures, - int npoints, - int nclusters, - float threshold, - int *membership) /* out: [npoints] */ -{ - int i, j, n = 0; /* counters */ - int loop=0, temp; - int *new_centers_len; /* [nclusters]: no. of points in each cluster */ - float delta; /* if the point moved */ - float **clusters; /* out: [nclusters][nfeatures] */ - float **new_centers; /* [nclusters][nfeatures] */ - - int *initial; /* used to hold the index of points not yet selected - prevents the "birthday problem" of dual selection (?) - considered holding initial cluster indices, but changed due to - possible, though unlikely, infinite loops */ - int initial_points; - int c = 0; - - /* nclusters should never be > npoints - that would guarantee a cluster without points */ - if (nclusters > npoints) - nclusters = npoints; - - /* allocate space for and initialize returning variable clusters[] */ - clusters = (float**) malloc(nclusters * sizeof(float*)); - clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); - for (i=1; i= 0; i++) { - //n = (int)rand() % initial_points; - - for (j=0; j 0) - clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ - new_centers[i][j] = 0.0; /* set back to 0 */ - } - new_centers_len[i] = 0; /* set back to 0 */ - } - c++; - } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ - printf("iterated %d times\n", c); - free(new_centers[0]); - free(new_centers); - free(new_centers_len); - - return clusters; -} - +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/benchmarks/opencl/kmeans/libkmeans.a b/benchmarks/old_opencl/kmeans/libkmeans.a similarity index 100% rename from benchmarks/opencl/kmeans/libkmeans.a rename to benchmarks/old_opencl/kmeans/libkmeans.a diff --git a/benchmarks/new_opencl/kmeans/main.cc b/benchmarks/old_opencl/kmeans/main.cc similarity index 92% rename from benchmarks/new_opencl/kmeans/main.cc rename to benchmarks/old_opencl/kmeans/main.cc index e6e97e3b..f458ab4b 100755 --- a/benchmarks/new_opencl/kmeans/main.cc +++ b/benchmarks/old_opencl/kmeans/main.cc @@ -1,382 +1,394 @@ -#include "kmeans.h" -#include -#include -#include -#include -#include -#include - -#ifdef WIN -#include -#else -#include -#include -double gettime() { - struct timeval t; - gettimeofday(&t, NULL); - return t.tv_sec + t.tv_usec * 1e-6; -} -#endif - -#ifdef NV -#include -#else -#include -#endif - -#ifndef FLT_MAX -#define FLT_MAX 3.40282347e+38 -#endif - -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE RD_WG_SIZE -#else -#define BLOCK_SIZE 256 -#endif - -#ifdef RD_WG_SIZE_1_0 -#define BLOCK_SIZE2 RD_WG_SIZE_1_0 -#elif defined(RD_WG_SIZE_1) -#define BLOCK_SIZE2 RD_WG_SIZE_1 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE2 RD_WG_SIZE -#else -#define BLOCK_SIZE2 256 -#endif - -// local variables -static cl_context context; -static cl_command_queue cmd_queue; -static cl_device_type device_type; -static cl_device_id *device_list; -static cl_int num_devices; - -static int initialize(int use_gpu) { - cl_int result; - size_t size; - - /*// create OpenCL context - cl_platform_id platform_id; - if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { - printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); - return -1; - } - cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, - (cl_context_properties)platform_id, 0}; - device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; - context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); - if (!context) { - printf("ERROR: clCreateContextFromType(%s) failed\n", - use_gpu ? "GPU" : "CPU"); - return -1; - } - - // get the list of GPUs - result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - num_devices = (int)(size / sizeof(cl_device_id)); - - if (result != CL_SUCCESS || num_devices < 1) { - printf("ERROR: clGetContextInfo() failed\n"); - return -1; - } - device_list = new cl_device_id[num_devices]; - if (!device_list) { - printf("ERROR: new cl_device_id[] failed\n"); - return -1; - } - result = - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); - if (result != CL_SUCCESS) { - printf("ERROR: clGetContextInfo() failed\n"); - return -1; - }*/ - - cl_platform_id platform_id; - num_devices = 1; - device_list = new cl_device_id[num_devices]; - - result = clGetPlatformIDs(1, &platform_id, NULL); - result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); - context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); - - // create command queue for the first device - cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); - if (!cmd_queue) { - printf("ERROR: clCreateCommandQueue() failed\n"); - return -1; - } - - return 0; -} - -static int shutdown() { - // release resources - if (cmd_queue) - clReleaseCommandQueue(cmd_queue); - if (context) - clReleaseContext(context); - if (device_list) - delete device_list; - - // reset all variables - cmd_queue = 0; - context = 0; - device_list = 0; - num_devices = 0; - device_type = 0; - - return 0; -} - -cl_mem d_feature; -cl_mem d_feature_swap; -cl_mem d_cluster; -cl_mem d_membership; - -cl_kernel kernel; -cl_kernel kernel_s; -cl_kernel kernel2; - -int *membership_OCL; -int *membership_d; -float *feature_d; -float *clusters_d; -float *center_d; - - -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - - -int allocate(int n_points, int n_features, int n_clusters, float **feature) { - /*int sourcesize = 1024 * 1024; - char *source = (char *)calloc(sourcesize, sizeof(char)); - if (!source) { - printf("ERROR: calloc(%d) failed\n", sourcesize); - return -1; - } - - // read the kernel core source - char *tempchar = "./kmeans.cl"; - FILE *fp = fopen(tempchar, "rb"); - if (!fp) { - printf("ERROR: unable to open '%s'\n", tempchar); - return -1; - } - fread(source + strlen(source), sourcesize, 1, fp); - fclose(fp);*/ - - // OpenCL initialization - int use_gpu = 1; - if (initialize(use_gpu)) - return -1; - - // compile kernel - cl_int err = 0; - //const char *slist[2] = {source, 0}; - //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); - cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateProgramWithSource() => %d\n", err); - return -1; - } - err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); - { // show warnings/errors - // static char log[65536]; memset(log, 0, sizeof(log)); - // cl_device_id device_id = 0; - // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), - //&device_id, NULL); - // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, - // sizeof(log)-1, log, NULL); - // if(err || strstr(log,"warning:") || strstr(log, "error:")) - // printf("<<<<\n%s\n>>>>\n", log); - } - if (err != CL_SUCCESS) { - printf("ERROR: clBuildProgram() => %d\n", err); - return -1; - } - - char *kernel_kmeans_c = "kmeans_kernel_c"; - char *kernel_swap = "kmeans_swap"; - - kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateKernel() 0 => %d\n", err); - return -1; - } - kernel2 = clCreateKernel(prog, kernel_swap, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateKernel() 0 => %d\n", err); - return -1; - } - - clReleaseProgram(prog); - - d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - d_feature_swap = - clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - d_cluster = - clCreateBuffer(context, CL_MEM_READ_WRITE, - n_clusters * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", - n_clusters * n_features, err); - return -1; - } - d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * sizeof(int), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, - err); - return -1; - } - - // write buffers - err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, - n_points * n_features * sizeof(float), feature[0], - 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - - clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); - clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); - clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); - clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); - - size_t global_work[3] = {n_points, 1, 1}; - /// Ke Wang adjustable local group size 2013/08/07 10:37:33 - size_t local_work_size = BLOCK_SIZE; // work group size is defined by - // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 - // 2014/06/10 17:00:51 - if (global_work[0] % local_work_size != 0) - global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; - - err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, - &local_work_size, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); - return -1; - } - - membership_OCL = (int *)malloc(n_points * sizeof(int)); -} - -void deallocateMemory() { - clReleaseMemObject(d_feature); - clReleaseMemObject(d_feature_swap); - clReleaseMemObject(d_cluster); - clReleaseMemObject(d_membership); - free(membership_OCL); -} - -int main(int argc, char **argv) { - printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", - BLOCK_SIZE, BLOCK_SIZE2); - setup(argc, argv); - shutdown(); -} - -int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ - int n_features, int n_points, int n_clusters, int *membership, - float **clusters, int *new_centers_len, float **new_centers) { - - int delta = 0; - int i, j, k; - cl_int err = 0; - - size_t global_work[3] = {n_points, 1, 1}; - - /// Ke Wang adjustable local group size 2013/08/07 10:37:33 - size_t local_work_size = BLOCK_SIZE2; // work group size is defined by - // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 - // 2014/06/10 17:00:41 - if (global_work[0] % local_work_size != 0) - global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; - - err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, - n_clusters * n_features * sizeof(float), - clusters[0], 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, - err); - return -1; - } - - int size = 0; - int offset = 0; - - clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); - clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); - clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); - clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); - clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); - clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); - clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); - clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); - - err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, - &local_work_size, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); - return -1; - } - clFinish(cmd_queue); - err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, - n_points * sizeof(int), membership_OCL, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: Memcopy Out\n"); - return -1; - } - - delta = 0; - for (i = 0; i < n_points; i++) { - int cluster_id = membership_OCL[i]; - new_centers_len[cluster_id]++; - if (membership_OCL[i] != membership[i]) { - delta++; - membership[i] = membership_OCL[i]; - } - for (j = 0; j < n_features; j++) { - new_centers[cluster_id][j] += feature[i][j]; - } - } - - return delta; -} +#include "kmeans.h" +#include +#include +#include +#include +#include +#include + +#ifdef WIN +#include +#else +#include +#include +double gettime() { + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} +#endif + +#ifdef NV +#include +#else +#include +#endif + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 256 +#endif + +#ifdef RD_WG_SIZE_1_0 +#define BLOCK_SIZE2 RD_WG_SIZE_1_0 +#elif defined(RD_WG_SIZE_1) +#define BLOCK_SIZE2 RD_WG_SIZE_1 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE2 RD_WG_SIZE +#else +#define BLOCK_SIZE2 256 +#endif + +// local variables +static cl_context context; +static cl_command_queue cmd_queue; +static cl_device_type device_type; +static cl_device_id *device_list; +static cl_int num_devices; + + +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + +static int initialize(int use_gpu) { + cl_int result; + size_t size; + + /*// create OpenCL context + cl_platform_id platform_id; + if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { + printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); + return -1; + } + cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)platform_id, 0}; + device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; + context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); + if (!context) { + printf("ERROR: clCreateContextFromType(%s) failed\n", + use_gpu ? "GPU" : "CPU"); + return -1; + } + + // get the list of GPUs + result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + num_devices = (int)(size / sizeof(cl_device_id)); + + if (result != CL_SUCCESS || num_devices < 1) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + } + device_list = new cl_device_id[num_devices]; + if (!device_list) { + printf("ERROR: new cl_device_id[] failed\n"); + return -1; + } + result = + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); + if (result != CL_SUCCESS) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + }*/ + + cl_platform_id platform_id; + num_devices = 1; + device_list = new cl_device_id[num_devices]; + + result = clGetPlatformIDs(1, &platform_id, NULL); + result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); + context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); + + // create command queue for the first device + cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); + if (!cmd_queue) { + printf("ERROR: clCreateCommandQueue() failed\n"); + return -1; + } + + return 0; +} + +static int shutdown() { + // release resources + if (cmd_queue) + clReleaseCommandQueue(cmd_queue); + if (context) + clReleaseContext(context); + if (device_list) + delete device_list; + + // reset all variables + cmd_queue = 0; + context = 0; + device_list = 0; + num_devices = 0; + device_type = 0; + + return 0; +} + +cl_mem d_feature; +cl_mem d_feature_swap; +cl_mem d_cluster; +cl_mem d_membership; + +cl_kernel kernel; +cl_kernel kernel_s; +cl_kernel kernel2; + +int *membership_OCL; +int *membership_d; +float *feature_d; +float *clusters_d; +float *center_d; + +uint8_t* kernel_bin = NULL; +size_t kernel_size = 0; +cl_int binary_status = 0; + + +int allocate(int n_points, int n_features, int n_clusters, float **feature) { + /*int sourcesize = 1024 * 1024; + char *source = (char *)calloc(sourcesize, sizeof(char)); + if (!source) { + printf("ERROR: calloc(%d) failed\n", sourcesize); + return -1; + } + + // read the kernel core source + char *tempchar = "./kmeans.cl"; + FILE *fp = fopen(tempchar, "rb"); + if (!fp) { + printf("ERROR: unable to open '%s'\n", tempchar); + return -1; + } + fread(source + strlen(source), sourcesize, 1, fp); + fclose(fp);*/ + + // OpenCL initialization + int use_gpu = 1; + if (initialize(use_gpu)) + return -1; + + // Load Kernel + if (read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) { + return -1; + } + + // compile kernel + cl_int err = 0; + //const char *slist[2] = {source, 0}; + //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); + cl_program prog = clCreateProgramWithBinary( + context, 1, device_list, &kernel_size, &kernel_bin, &binary_status, &err); + // cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateProgramWithSource() => %d\n", err); + return -1; + } + err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); + { // show warnings/errors + // static char log[65536]; memset(log, 0, sizeof(log)); + // cl_device_id device_id = 0; + // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), + //&device_id, NULL); + // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, + // sizeof(log)-1, log, NULL); + // if(err || strstr(log,"warning:") || strstr(log, "error:")) + // printf("<<<<\n%s\n>>>>\n", log); + } + if (err != CL_SUCCESS) { + printf("ERROR: clBuildProgram() => %d\n", err); + return -1; + } + + char *kernel_kmeans_c = "kmeans_kernel_c"; + char *kernel_swap = "kmeans_swap"; + + kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + kernel2 = clCreateKernel(prog, kernel_swap, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + + clReleaseProgram(prog); + + d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_feature_swap = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_cluster = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_clusters * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", + n_clusters * n_features, err); + return -1; + } + d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * sizeof(int), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, + err); + return -1; + } + + // write buffers + err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, + n_points * n_features * sizeof(float), feature[0], + 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + + clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); + clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); + + size_t global_work[3] = {n_points, 1, 1}; + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE; // work group size is defined by + // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + // 2014/06/10 17:00:51 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + + membership_OCL = (int *)malloc(n_points * sizeof(int)); +} + +void deallocateMemory() { + clReleaseMemObject(d_feature); + clReleaseMemObject(d_feature_swap); + clReleaseMemObject(d_cluster); + clReleaseMemObject(d_membership); + if (kernel_bin) free(kernel_bin); + free(membership_OCL); +} + +int main(int argc, char **argv) { + printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", + BLOCK_SIZE, BLOCK_SIZE2); + setup(argc, argv); + shutdown(); +} + +int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ + int n_features, int n_points, int n_clusters, int *membership, + float **clusters, int *new_centers_len, float **new_centers) { + + int delta = 0; + int i, j, k; + cl_int err = 0; + + size_t global_work[3] = {n_points, 1, 1}; + + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE2; // work group size is defined by + // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 + // 2014/06/10 17:00:41 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, + n_clusters * n_features * sizeof(float), + clusters[0], 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, + err); + return -1; + } + + int size = 0; + int offset = 0; + + clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); + clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); + clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); + clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); + clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); + clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); + + err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + clFinish(cmd_queue); + err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, + n_points * sizeof(int), membership_OCL, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: Memcopy Out\n"); + return -1; + } + + delta = 0; + for (i = 0; i < n_points; i++) { + int cluster_id = membership_OCL[i]; + new_centers_len[cluster_id]++; + if (membership_OCL[i] != membership[i]) { + delta++; + membership[i] = membership_OCL[i]; + } + for (j = 0; j < n_features; j++) { + new_centers[cluster_id][j] += feature[i][j]; + } + } + + return delta; +} diff --git a/benchmarks/new_opencl/kmeans/read_input.c b/benchmarks/old_opencl/kmeans/read_input.c similarity index 100% rename from benchmarks/new_opencl/kmeans/read_input.c rename to benchmarks/old_opencl/kmeans/read_input.c diff --git a/benchmarks/new_opencl/kmeans/rmse.c b/benchmarks/old_opencl/kmeans/rmse.c similarity index 100% rename from benchmarks/new_opencl/kmeans/rmse.c rename to benchmarks/old_opencl/kmeans/rmse.c diff --git a/benchmarks/opencl/kmeans/run b/benchmarks/old_opencl/kmeans/run similarity index 100% rename from benchmarks/opencl/kmeans/run rename to benchmarks/old_opencl/kmeans/run diff --git a/benchmarks/opencl/lbm/120_120_150_ldc.of b/benchmarks/old_opencl/lbm/120_120_150_ldc.of similarity index 100% rename from benchmarks/opencl/lbm/120_120_150_ldc.of rename to benchmarks/old_opencl/lbm/120_120_150_ldc.of diff --git a/benchmarks/opencl/lbm/Makefile b/benchmarks/old_opencl/lbm/Makefile similarity index 100% rename from benchmarks/opencl/lbm/Makefile rename to benchmarks/old_opencl/lbm/Makefile diff --git a/benchmarks/opencl/lbm/args.c b/benchmarks/old_opencl/lbm/args.c similarity index 100% rename from benchmarks/opencl/lbm/args.c rename to benchmarks/old_opencl/lbm/args.c diff --git a/benchmarks/opencl/lbm/gpu_info.c b/benchmarks/old_opencl/lbm/gpu_info.c similarity index 100% rename from benchmarks/opencl/lbm/gpu_info.c rename to benchmarks/old_opencl/lbm/gpu_info.c diff --git a/benchmarks/opencl/lbm/gpu_info.h b/benchmarks/old_opencl/lbm/gpu_info.h similarity index 100% rename from benchmarks/opencl/lbm/gpu_info.h rename to benchmarks/old_opencl/lbm/gpu_info.h diff --git a/benchmarks/opencl/lbm/kernel.cl b/benchmarks/old_opencl/lbm/kernel.cl similarity index 100% rename from benchmarks/opencl/lbm/kernel.cl rename to benchmarks/old_opencl/lbm/kernel.cl diff --git a/benchmarks/opencl/lbm/layout_config.h b/benchmarks/old_opencl/lbm/layout_config.h similarity index 100% rename from benchmarks/opencl/lbm/layout_config.h rename to benchmarks/old_opencl/lbm/layout_config.h diff --git a/benchmarks/opencl/lbm/lbm.c b/benchmarks/old_opencl/lbm/lbm.c similarity index 100% rename from benchmarks/opencl/lbm/lbm.c rename to benchmarks/old_opencl/lbm/lbm.c diff --git a/benchmarks/opencl/lbm/lbm.h b/benchmarks/old_opencl/lbm/lbm.h similarity index 100% rename from benchmarks/opencl/lbm/lbm.h rename to benchmarks/old_opencl/lbm/lbm.h diff --git a/benchmarks/opencl/lbm/lbm_macros.h b/benchmarks/old_opencl/lbm/lbm_macros.h similarity index 100% rename from benchmarks/opencl/lbm/lbm_macros.h rename to benchmarks/old_opencl/lbm/lbm_macros.h diff --git a/benchmarks/opencl/lbm/liblbm.a b/benchmarks/old_opencl/lbm/liblbm.a similarity index 100% rename from benchmarks/opencl/lbm/liblbm.a rename to benchmarks/old_opencl/lbm/liblbm.a diff --git a/benchmarks/opencl/lbm/main.cc b/benchmarks/old_opencl/lbm/main.cc similarity index 100% rename from benchmarks/opencl/lbm/main.cc rename to benchmarks/old_opencl/lbm/main.cc diff --git a/benchmarks/opencl/lbm/main.h b/benchmarks/old_opencl/lbm/main.h similarity index 100% rename from benchmarks/opencl/lbm/main.h rename to benchmarks/old_opencl/lbm/main.h diff --git a/benchmarks/opencl/lbm/ocl.c b/benchmarks/old_opencl/lbm/ocl.c similarity index 100% rename from benchmarks/opencl/lbm/ocl.c rename to benchmarks/old_opencl/lbm/ocl.c diff --git a/benchmarks/opencl/lbm/ocl.h b/benchmarks/old_opencl/lbm/ocl.h similarity index 100% rename from benchmarks/opencl/lbm/ocl.h rename to benchmarks/old_opencl/lbm/ocl.h diff --git a/benchmarks/opencl/lbm/parboil.h b/benchmarks/old_opencl/lbm/parboil.h similarity index 100% rename from benchmarks/opencl/lbm/parboil.h rename to benchmarks/old_opencl/lbm/parboil.h diff --git a/benchmarks/opencl/lbm/parboil_opencl.c b/benchmarks/old_opencl/lbm/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/lbm/parboil_opencl.c rename to benchmarks/old_opencl/lbm/parboil_opencl.c diff --git a/benchmarks/opencl/lib/libOpenCL.a b/benchmarks/old_opencl/lib/libOpenCL.a similarity index 100% rename from benchmarks/opencl/lib/libOpenCL.a rename to benchmarks/old_opencl/lib/libOpenCL.a diff --git a/benchmarks/opencl/mri-q/32_32_32_dataset.bin b/benchmarks/old_opencl/mri-q/32_32_32_dataset.bin similarity index 100% rename from benchmarks/opencl/mri-q/32_32_32_dataset.bin rename to benchmarks/old_opencl/mri-q/32_32_32_dataset.bin diff --git a/benchmarks/opencl/mri-q/Makefile b/benchmarks/old_opencl/mri-q/Makefile similarity index 100% rename from benchmarks/opencl/mri-q/Makefile rename to benchmarks/old_opencl/mri-q/Makefile diff --git a/benchmarks/opencl/mri-q/args.c b/benchmarks/old_opencl/mri-q/args.c similarity index 100% rename from benchmarks/opencl/mri-q/args.c rename to benchmarks/old_opencl/mri-q/args.c diff --git a/benchmarks/opencl/mri-q/computeQ.c b/benchmarks/old_opencl/mri-q/computeQ.c similarity index 100% rename from benchmarks/opencl/mri-q/computeQ.c rename to benchmarks/old_opencl/mri-q/computeQ.c diff --git a/benchmarks/opencl/mri-q/computeQ.h b/benchmarks/old_opencl/mri-q/computeQ.h similarity index 100% rename from benchmarks/opencl/mri-q/computeQ.h rename to benchmarks/old_opencl/mri-q/computeQ.h diff --git a/benchmarks/opencl/mri-q/file.cc b/benchmarks/old_opencl/mri-q/file.cc similarity index 100% rename from benchmarks/opencl/mri-q/file.cc rename to benchmarks/old_opencl/mri-q/file.cc diff --git a/benchmarks/opencl/mri-q/file.h b/benchmarks/old_opencl/mri-q/file.h similarity index 100% rename from benchmarks/opencl/mri-q/file.h rename to benchmarks/old_opencl/mri-q/file.h diff --git a/benchmarks/opencl/mri-q/gpu_info.c b/benchmarks/old_opencl/mri-q/gpu_info.c similarity index 100% rename from benchmarks/opencl/mri-q/gpu_info.c rename to benchmarks/old_opencl/mri-q/gpu_info.c diff --git a/benchmarks/opencl/mri-q/gpu_info.h b/benchmarks/old_opencl/mri-q/gpu_info.h similarity index 100% rename from benchmarks/opencl/mri-q/gpu_info.h rename to benchmarks/old_opencl/mri-q/gpu_info.h diff --git a/benchmarks/opencl/mri-q/kernel.cl b/benchmarks/old_opencl/mri-q/kernel.cl similarity index 100% rename from benchmarks/opencl/mri-q/kernel.cl rename to benchmarks/old_opencl/mri-q/kernel.cl diff --git a/benchmarks/opencl/mri-q/libmri-q.a b/benchmarks/old_opencl/mri-q/libmri-q.a similarity index 100% rename from benchmarks/opencl/mri-q/libmri-q.a rename to benchmarks/old_opencl/mri-q/libmri-q.a diff --git a/benchmarks/opencl/mri-q/libsgemm.a b/benchmarks/old_opencl/mri-q/libsgemm.a similarity index 100% rename from benchmarks/opencl/mri-q/libsgemm.a rename to benchmarks/old_opencl/mri-q/libsgemm.a diff --git a/benchmarks/opencl/mri-q/macros.h b/benchmarks/old_opencl/mri-q/macros.h similarity index 100% rename from benchmarks/opencl/mri-q/macros.h rename to benchmarks/old_opencl/mri-q/macros.h diff --git a/benchmarks/opencl/mri-q/main.cc b/benchmarks/old_opencl/mri-q/main.cc similarity index 100% rename from benchmarks/opencl/mri-q/main.cc rename to benchmarks/old_opencl/mri-q/main.cc diff --git a/benchmarks/opencl/mri-q/ocl copy.c b/benchmarks/old_opencl/mri-q/ocl copy.c similarity index 100% rename from benchmarks/opencl/mri-q/ocl copy.c rename to benchmarks/old_opencl/mri-q/ocl copy.c diff --git a/benchmarks/opencl/mri-q/ocl copy.h b/benchmarks/old_opencl/mri-q/ocl copy.h similarity index 100% rename from benchmarks/opencl/mri-q/ocl copy.h rename to benchmarks/old_opencl/mri-q/ocl copy.h diff --git a/benchmarks/opencl/mri-q/ocl.c b/benchmarks/old_opencl/mri-q/ocl.c similarity index 100% rename from benchmarks/opencl/mri-q/ocl.c rename to benchmarks/old_opencl/mri-q/ocl.c diff --git a/benchmarks/opencl/mri-q/ocl.h b/benchmarks/old_opencl/mri-q/ocl.h similarity index 100% rename from benchmarks/opencl/mri-q/ocl.h rename to benchmarks/old_opencl/mri-q/ocl.h diff --git a/benchmarks/opencl/mri-q/parboil.h b/benchmarks/old_opencl/mri-q/parboil.h similarity index 100% rename from benchmarks/opencl/mri-q/parboil.h rename to benchmarks/old_opencl/mri-q/parboil.h diff --git a/benchmarks/opencl/mri-q/parboil_opencl.c b/benchmarks/old_opencl/mri-q/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/mri-q/parboil_opencl.c rename to benchmarks/old_opencl/mri-q/parboil_opencl.c diff --git a/benchmarks/old_opencl/nearn/Makefile b/benchmarks/old_opencl/nearn/Makefile new file mode 100644 index 00000000..1af0e492 --- /dev/null +++ b/benchmarks/old_opencl/nearn/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = nearn + +SRCS = main.cc clutils.cpp utils.cpp + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/new_opencl/nearn/README.txt b/benchmarks/old_opencl/nearn/README.txt similarity index 100% rename from benchmarks/new_opencl/nearn/README.txt rename to benchmarks/old_opencl/nearn/README.txt diff --git a/benchmarks/new_opencl/nearn/cane4_0.db b/benchmarks/old_opencl/nearn/cane4_0.db similarity index 100% rename from benchmarks/new_opencl/nearn/cane4_0.db rename to benchmarks/old_opencl/nearn/cane4_0.db diff --git a/benchmarks/new_opencl/nearn/cane4_1.db b/benchmarks/old_opencl/nearn/cane4_1.db similarity index 100% rename from benchmarks/new_opencl/nearn/cane4_1.db rename to benchmarks/old_opencl/nearn/cane4_1.db diff --git a/benchmarks/new_opencl/nearn/cane4_2.db b/benchmarks/old_opencl/nearn/cane4_2.db similarity index 100% rename from benchmarks/new_opencl/nearn/cane4_2.db rename to benchmarks/old_opencl/nearn/cane4_2.db diff --git a/benchmarks/new_opencl/nearn/cane4_3.db b/benchmarks/old_opencl/nearn/cane4_3.db similarity index 100% rename from benchmarks/new_opencl/nearn/cane4_3.db rename to benchmarks/old_opencl/nearn/cane4_3.db diff --git a/benchmarks/new_opencl/guassian/clutils.cpp b/benchmarks/old_opencl/nearn/clutils.cpp similarity index 97% rename from benchmarks/new_opencl/guassian/clutils.cpp rename to benchmarks/old_opencl/nearn/clutils.cpp index 32feef52..cd0dbb2f 100755 --- a/benchmarks/new_opencl/guassian/clutils.cpp +++ b/benchmarks/old_opencl/nearn/clutils.cpp @@ -782,27 +782,6 @@ void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size) cl_unmapBuffer(mem, ptr); } -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - //------------------------------------------------------- // Program and kernels //------------------------------------------------------- @@ -860,20 +839,11 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos // Create the program object //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); - //cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); - // read kernel binary from file - uint8_t *kernel_bin = NULL; - size_t kernel_size; - cl_int binary_status = 0; - status = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); - cl_errChk(status, "read_kernel_file", true); - cl_program clProgramReturn = clCreateProgramWithBinary( - context, 1, &device, &kernel_size, &kernel_bin, &binary_status, &status); - free(kernel_bin); + cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "NearestNeighbor", &status); cl_errChk(status, "Creating program", true); - //free(source); - //fclose(fp); + free(source); + fclose(fp); // Try to compile the program status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); @@ -1440,4 +1410,4 @@ char* itoa_portable(int value, char* result, int base) { } return result; -} +} \ No newline at end of file diff --git a/benchmarks/new_opencl/nearn/clutils.h b/benchmarks/old_opencl/nearn/clutils.h similarity index 100% rename from benchmarks/new_opencl/nearn/clutils.h rename to benchmarks/old_opencl/nearn/clutils.h diff --git a/benchmarks/new_opencl/nearn/filelist.txt b/benchmarks/old_opencl/nearn/filelist.txt similarity index 100% rename from benchmarks/new_opencl/nearn/filelist.txt rename to benchmarks/old_opencl/nearn/filelist.txt diff --git a/benchmarks/opencl/nearn/gettimeofday.cpp b/benchmarks/old_opencl/nearn/gettimeofday.cpp similarity index 100% rename from benchmarks/opencl/nearn/gettimeofday.cpp rename to benchmarks/old_opencl/nearn/gettimeofday.cpp diff --git a/benchmarks/opencl/nearn/gettimeofday.h b/benchmarks/old_opencl/nearn/gettimeofday.h similarity index 100% rename from benchmarks/opencl/nearn/gettimeofday.h rename to benchmarks/old_opencl/nearn/gettimeofday.h diff --git a/benchmarks/new_opencl/nearn/ipoint.h b/benchmarks/old_opencl/nearn/ipoint.h similarity index 100% rename from benchmarks/new_opencl/nearn/ipoint.h rename to benchmarks/old_opencl/nearn/ipoint.h diff --git a/benchmarks/new_opencl/nearn/kernel.cl b/benchmarks/old_opencl/nearn/kernel.cl similarity index 100% rename from benchmarks/new_opencl/nearn/kernel.cl rename to benchmarks/old_opencl/nearn/kernel.cl diff --git a/benchmarks/opencl/nearn/libnearn.a b/benchmarks/old_opencl/nearn/libnearn.a similarity index 100% rename from benchmarks/opencl/nearn/libnearn.a rename to benchmarks/old_opencl/nearn/libnearn.a diff --git a/benchmarks/new_opencl/nearn/main.cc b/benchmarks/old_opencl/nearn/main.cc similarity index 100% rename from benchmarks/new_opencl/nearn/main.cc rename to benchmarks/old_opencl/nearn/main.cc diff --git a/benchmarks/new_opencl/nearn/nearestNeighbor.h b/benchmarks/old_opencl/nearn/nearestNeighbor.h similarity index 100% rename from benchmarks/new_opencl/nearn/nearestNeighbor.h rename to benchmarks/old_opencl/nearn/nearestNeighbor.h diff --git a/benchmarks/new_opencl/nearn/run b/benchmarks/old_opencl/nearn/run similarity index 100% rename from benchmarks/new_opencl/nearn/run rename to benchmarks/old_opencl/nearn/run diff --git a/benchmarks/new_opencl/nearn/utils.cpp b/benchmarks/old_opencl/nearn/utils.cpp similarity index 100% rename from benchmarks/new_opencl/nearn/utils.cpp rename to benchmarks/old_opencl/nearn/utils.cpp diff --git a/benchmarks/new_opencl/nearn/utils.h b/benchmarks/old_opencl/nearn/utils.h similarity index 100% rename from benchmarks/new_opencl/nearn/utils.h rename to benchmarks/old_opencl/nearn/utils.h diff --git a/benchmarks/opencl/reduce0/Makefile b/benchmarks/old_opencl/reduce0/Makefile similarity index 100% rename from benchmarks/opencl/reduce0/Makefile rename to benchmarks/old_opencl/reduce0/Makefile diff --git a/benchmarks/opencl/reduce0/main.cc b/benchmarks/old_opencl/reduce0/main.cc similarity index 100% rename from benchmarks/opencl/reduce0/main.cc rename to benchmarks/old_opencl/reduce0/main.cc diff --git a/benchmarks/opencl/reduce0/oclReduction.h b/benchmarks/old_opencl/reduce0/oclReduction.h similarity index 100% rename from benchmarks/opencl/reduce0/oclReduction.h rename to benchmarks/old_opencl/reduce0/oclReduction.h diff --git a/benchmarks/opencl/reduce0/oclReduction_kernel.cl b/benchmarks/old_opencl/reduce0/oclReduction_kernel.cl similarity index 100% rename from benchmarks/opencl/reduce0/oclReduction_kernel.cl rename to benchmarks/old_opencl/reduce0/oclReduction_kernel.cl diff --git a/benchmarks/opencl/reduce0/oclUtils.h b/benchmarks/old_opencl/reduce0/oclUtils.h similarity index 100% rename from benchmarks/opencl/reduce0/oclUtils.h rename to benchmarks/old_opencl/reduce0/oclUtils.h diff --git a/benchmarks/opencl/reduce0/shrQATest.h b/benchmarks/old_opencl/reduce0/shrQATest.h similarity index 100% rename from benchmarks/opencl/reduce0/shrQATest.h rename to benchmarks/old_opencl/reduce0/shrQATest.h diff --git a/benchmarks/opencl/reduce0/shrUtils.h b/benchmarks/old_opencl/reduce0/shrUtils.h similarity index 100% rename from benchmarks/opencl/reduce0/shrUtils.h rename to benchmarks/old_opencl/reduce0/shrUtils.h diff --git a/benchmarks/opencl/sad/DESCRIPTION b/benchmarks/old_opencl/sad/DESCRIPTION similarity index 100% rename from benchmarks/opencl/sad/DESCRIPTION rename to benchmarks/old_opencl/sad/DESCRIPTION diff --git a/benchmarks/opencl/sad/Makefile b/benchmarks/old_opencl/sad/Makefile similarity index 100% rename from benchmarks/opencl/sad/Makefile rename to benchmarks/old_opencl/sad/Makefile diff --git a/benchmarks/opencl/sad/OpenCL_common.cpp b/benchmarks/old_opencl/sad/OpenCL_common.cpp similarity index 100% rename from benchmarks/opencl/sad/OpenCL_common.cpp rename to benchmarks/old_opencl/sad/OpenCL_common.cpp diff --git a/benchmarks/opencl/sad/OpenCL_common.h b/benchmarks/old_opencl/sad/OpenCL_common.h similarity index 100% rename from benchmarks/opencl/sad/OpenCL_common.h rename to benchmarks/old_opencl/sad/OpenCL_common.h diff --git a/benchmarks/opencl/sad/args.c b/benchmarks/old_opencl/sad/args.c similarity index 100% rename from benchmarks/opencl/sad/args.c rename to benchmarks/old_opencl/sad/args.c diff --git a/benchmarks/opencl/sad/file.c b/benchmarks/old_opencl/sad/file.c similarity index 100% rename from benchmarks/opencl/sad/file.c rename to benchmarks/old_opencl/sad/file.c diff --git a/benchmarks/opencl/sad/file.h b/benchmarks/old_opencl/sad/file.h similarity index 100% rename from benchmarks/opencl/sad/file.h rename to benchmarks/old_opencl/sad/file.h diff --git a/benchmarks/opencl/sad/frame.bin b/benchmarks/old_opencl/sad/frame.bin similarity index 100% rename from benchmarks/opencl/sad/frame.bin rename to benchmarks/old_opencl/sad/frame.bin diff --git a/benchmarks/opencl/sad/gpu_info.c b/benchmarks/old_opencl/sad/gpu_info.c similarity index 100% rename from benchmarks/opencl/sad/gpu_info.c rename to benchmarks/old_opencl/sad/gpu_info.c diff --git a/benchmarks/opencl/sad/gpu_info.h b/benchmarks/old_opencl/sad/gpu_info.h similarity index 100% rename from benchmarks/opencl/sad/gpu_info.h rename to benchmarks/old_opencl/sad/gpu_info.h diff --git a/benchmarks/opencl/sad/image.c b/benchmarks/old_opencl/sad/image.c similarity index 100% rename from benchmarks/opencl/sad/image.c rename to benchmarks/old_opencl/sad/image.c diff --git a/benchmarks/opencl/sad/image.h b/benchmarks/old_opencl/sad/image.h similarity index 100% rename from benchmarks/opencl/sad/image.h rename to benchmarks/old_opencl/sad/image.h diff --git a/benchmarks/opencl/sad/kernel.cl b/benchmarks/old_opencl/sad/kernel.cl similarity index 100% rename from benchmarks/opencl/sad/kernel.cl rename to benchmarks/old_opencl/sad/kernel.cl diff --git a/benchmarks/opencl/sad/libsad.a b/benchmarks/old_opencl/sad/libsad.a similarity index 100% rename from benchmarks/opencl/sad/libsad.a rename to benchmarks/old_opencl/sad/libsad.a diff --git a/benchmarks/opencl/sad/main.cc b/benchmarks/old_opencl/sad/main.cc similarity index 100% rename from benchmarks/opencl/sad/main.cc rename to benchmarks/old_opencl/sad/main.cc diff --git a/benchmarks/opencl/sad/ocl.c b/benchmarks/old_opencl/sad/ocl.c similarity index 100% rename from benchmarks/opencl/sad/ocl.c rename to benchmarks/old_opencl/sad/ocl.c diff --git a/benchmarks/opencl/sad/ocl.h b/benchmarks/old_opencl/sad/ocl.h similarity index 100% rename from benchmarks/opencl/sad/ocl.h rename to benchmarks/old_opencl/sad/ocl.h diff --git a/benchmarks/opencl/sad/parboil.h b/benchmarks/old_opencl/sad/parboil.h similarity index 100% rename from benchmarks/opencl/sad/parboil.h rename to benchmarks/old_opencl/sad/parboil.h diff --git a/benchmarks/opencl/sad/parboil_opencl.c b/benchmarks/old_opencl/sad/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/sad/parboil_opencl.c rename to benchmarks/old_opencl/sad/parboil_opencl.c diff --git a/benchmarks/opencl/sad/reference.bin b/benchmarks/old_opencl/sad/reference.bin similarity index 100% rename from benchmarks/opencl/sad/reference.bin rename to benchmarks/old_opencl/sad/reference.bin diff --git a/benchmarks/opencl/sad/sad.h b/benchmarks/old_opencl/sad/sad.h similarity index 100% rename from benchmarks/opencl/sad/sad.h rename to benchmarks/old_opencl/sad/sad.h diff --git a/benchmarks/opencl/sad/sad_kernel.h b/benchmarks/old_opencl/sad/sad_kernel.h similarity index 100% rename from benchmarks/opencl/sad/sad_kernel.h rename to benchmarks/old_opencl/sad/sad_kernel.h diff --git a/benchmarks/old_opencl/saxpy/Makefile b/benchmarks/old_opencl/saxpy/Makefile new file mode 100644 index 00000000..6f6ae366 --- /dev/null +++ b/benchmarks/old_opencl/saxpy/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = saxpy + + SRCS = main.cc + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/new_opencl/saxpy/README b/benchmarks/old_opencl/saxpy/README similarity index 100% rename from benchmarks/new_opencl/saxpy/README rename to benchmarks/old_opencl/saxpy/README diff --git a/benchmarks/new_opencl/saxpy/kernel.cl b/benchmarks/old_opencl/saxpy/kernel.cl similarity index 100% rename from benchmarks/new_opencl/saxpy/kernel.cl rename to benchmarks/old_opencl/saxpy/kernel.cl diff --git a/benchmarks/opencl/saxpy/libsaxpy.a b/benchmarks/old_opencl/saxpy/libsaxpy.a similarity index 100% rename from benchmarks/opencl/saxpy/libsaxpy.a rename to benchmarks/old_opencl/saxpy/libsaxpy.a diff --git a/benchmarks/new_opencl/saxpy/main.cc b/benchmarks/old_opencl/saxpy/main.cc similarity index 87% rename from benchmarks/new_opencl/saxpy/main.cc rename to benchmarks/old_opencl/saxpy/main.cc index dd952d46..9cf5d774 100644 --- a/benchmarks/new_opencl/saxpy/main.cc +++ b/benchmarks/old_opencl/saxpy/main.cc @@ -45,7 +45,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ + typeof(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -58,29 +58,6 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -uint8_t *kernel_bin = NULL; - /// // Cleanup any created OpenCL resources // @@ -101,8 +78,6 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); - - if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -110,14 +85,9 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t kernel_size; - cl_int binary_status = 0; + size_t binary_size; int i; - // read kernel binary from file - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - return -1; - // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -135,8 +105,8 @@ int main(int argc, char **argv) { // If that is not available, then create the program from source // and store the binary for future use. std::cout << "Attempting to create program from binary..." << std::endl; - cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( - context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); + cl_program program = + clCreateProgramWithBuiltInKernels(context, 1, &device_id, "saxpy", NULL); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); @@ -183,7 +153,7 @@ int main(int argc, char **argv) { } cl_event kernel_completion; - size_t global_work_size[] = {NUM_DATA/2,NUM_DATA/2}; + size_t global_work_size[1] = {NUM_DATA}; printf("attempting to enqueue kernel\n"); fflush(stdout); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, diff --git a/benchmarks/old_opencl/sfilter/Makefile b/benchmarks/old_opencl/sfilter/Makefile new file mode 100644 index 00000000..edb4aab3 --- /dev/null +++ b/benchmarks/old_opencl/sfilter/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = sfilter + + SRCS = main.cc + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/new_opencl/sfilter/README b/benchmarks/old_opencl/sfilter/README similarity index 100% rename from benchmarks/new_opencl/sfilter/README rename to benchmarks/old_opencl/sfilter/README diff --git a/benchmarks/new_opencl/sfilter/kernel.cl b/benchmarks/old_opencl/sfilter/kernel.cl similarity index 100% rename from benchmarks/new_opencl/sfilter/kernel.cl rename to benchmarks/old_opencl/sfilter/kernel.cl diff --git a/benchmarks/opencl/sfilter/libsfilter.a b/benchmarks/old_opencl/sfilter/libsfilter.a similarity index 100% rename from benchmarks/opencl/sfilter/libsfilter.a rename to benchmarks/old_opencl/sfilter/libsfilter.a diff --git a/benchmarks/new_opencl/sfilter/main.cc b/benchmarks/old_opencl/sfilter/main.cc similarity index 91% rename from benchmarks/new_opencl/sfilter/main.cc rename to benchmarks/old_opencl/sfilter/main.cc index 637d591a..d29beff0 100644 --- a/benchmarks/new_opencl/sfilter/main.cc +++ b/benchmarks/old_opencl/sfilter/main.cc @@ -48,7 +48,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ + typeof(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -60,30 +60,6 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data) { fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } - -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -uint8_t *kernel_bin = NULL; - // inlcude pocl float to half conversions typedef union { int32_t i; @@ -175,8 +151,6 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); - - if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -184,14 +158,9 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t kernel_size; - cl_int binary_status = 0; + size_t binary_size; int i; - // read kernel binary from file - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - return -1; - // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -209,8 +178,7 @@ int main(int argc, char **argv) { // If that is not available, then create the program from source // and store the binary for future use. std::cout << "Attempting to create program from binary..." << std::endl; - cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( - context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); + cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sfilter", NULL); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); diff --git a/benchmarks/old_opencl/sgemm/Makefile b/benchmarks/old_opencl/sgemm/Makefile new file mode 100644 index 00000000..ce7a3d1f --- /dev/null +++ b/benchmarks/old_opencl/sgemm/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = sgemm + +SRCS = main.cc + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/new_opencl/sgemm/README b/benchmarks/old_opencl/sgemm/README similarity index 100% rename from benchmarks/new_opencl/sgemm/README rename to benchmarks/old_opencl/sgemm/README diff --git a/benchmarks/new_opencl/sgemm/kernel.cl b/benchmarks/old_opencl/sgemm/kernel.cl similarity index 100% rename from benchmarks/new_opencl/sgemm/kernel.cl rename to benchmarks/old_opencl/sgemm/kernel.cl diff --git a/benchmarks/opencl/sgemm/libsgemm.a b/benchmarks/old_opencl/sgemm/libsgemm.a similarity index 100% rename from benchmarks/opencl/sgemm/libsgemm.a rename to benchmarks/old_opencl/sgemm/libsgemm.a diff --git a/benchmarks/new_opencl/sgemm/main.cc b/benchmarks/old_opencl/sgemm/main.cc similarity index 89% rename from benchmarks/new_opencl/sgemm/main.cc rename to benchmarks/old_opencl/sgemm/main.cc index 2b72d1e5..64e605a0 100644 --- a/benchmarks/new_opencl/sgemm/main.cc +++ b/benchmarks/old_opencl/sgemm/main.cc @@ -46,7 +46,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ + typeof(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -59,29 +59,6 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -uint8_t *kernel_bin = NULL; - /// // Cleanup any created OpenCL resources // @@ -102,8 +79,6 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); - - if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -111,14 +86,9 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t kernel_size; - cl_int binary_status = 0; + size_t binary_size; int i; - // read kernel binary from file - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - return -1; - // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -140,8 +110,8 @@ int main(int argc, char **argv) { std::cout << "Attempting to create program from binary..." << std::endl; // cl_program program = CreateProgramFromBinary(context, device_id, // "kernel.cl.bin"); - cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( - context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); + cl_program program = + clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sgemm", NULL); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); @@ -176,7 +146,7 @@ int main(int argc, char **argv) { memObjects[1] = input_bufferB; memObjects[2] = output_buffer; - int width = NUM_DATA; + size_t width = NUM_DATA; printf("attempting to create kernel\n"); fflush(stdout); diff --git a/benchmarks/opencl/spmv/1138_bus.mtx b/benchmarks/old_opencl/spmv/1138_bus.mtx similarity index 100% rename from benchmarks/opencl/spmv/1138_bus.mtx rename to benchmarks/old_opencl/spmv/1138_bus.mtx diff --git a/benchmarks/opencl/spmv/DESCRIPTION b/benchmarks/old_opencl/spmv/DESCRIPTION similarity index 100% rename from benchmarks/opencl/spmv/DESCRIPTION rename to benchmarks/old_opencl/spmv/DESCRIPTION diff --git a/benchmarks/opencl/spmv/Makefile b/benchmarks/old_opencl/spmv/Makefile similarity index 100% rename from benchmarks/opencl/spmv/Makefile rename to benchmarks/old_opencl/spmv/Makefile diff --git a/benchmarks/opencl/spmv/args.c b/benchmarks/old_opencl/spmv/args.c similarity index 100% rename from benchmarks/opencl/spmv/args.c rename to benchmarks/old_opencl/spmv/args.c diff --git a/benchmarks/opencl/spmv/convert_dataset.c b/benchmarks/old_opencl/spmv/convert_dataset.c similarity index 100% rename from benchmarks/opencl/spmv/convert_dataset.c rename to benchmarks/old_opencl/spmv/convert_dataset.c diff --git a/benchmarks/opencl/spmv/convert_dataset.h b/benchmarks/old_opencl/spmv/convert_dataset.h similarity index 100% rename from benchmarks/opencl/spmv/convert_dataset.h rename to benchmarks/old_opencl/spmv/convert_dataset.h diff --git a/benchmarks/opencl/spmv/file.c b/benchmarks/old_opencl/spmv/file.c similarity index 100% rename from benchmarks/opencl/spmv/file.c rename to benchmarks/old_opencl/spmv/file.c diff --git a/benchmarks/opencl/spmv/file.h b/benchmarks/old_opencl/spmv/file.h similarity index 100% rename from benchmarks/opencl/spmv/file.h rename to benchmarks/old_opencl/spmv/file.h diff --git a/benchmarks/opencl/spmv/gpu_info.c b/benchmarks/old_opencl/spmv/gpu_info.c similarity index 100% rename from benchmarks/opencl/spmv/gpu_info.c rename to benchmarks/old_opencl/spmv/gpu_info.c diff --git a/benchmarks/opencl/spmv/gpu_info.h b/benchmarks/old_opencl/spmv/gpu_info.h similarity index 100% rename from benchmarks/opencl/spmv/gpu_info.h rename to benchmarks/old_opencl/spmv/gpu_info.h diff --git a/benchmarks/opencl/spmv/input/1138_bus.mtx b/benchmarks/old_opencl/spmv/input/1138_bus.mtx similarity index 100% rename from benchmarks/opencl/spmv/input/1138_bus.mtx rename to benchmarks/old_opencl/spmv/input/1138_bus.mtx diff --git a/benchmarks/opencl/spmv/input/1138_bus.mtx.bin b/benchmarks/old_opencl/spmv/input/1138_bus.mtx.bin similarity index 100% rename from benchmarks/opencl/spmv/input/1138_bus.mtx.bin rename to benchmarks/old_opencl/spmv/input/1138_bus.mtx.bin diff --git a/benchmarks/opencl/spmv/input/DESCRIPTION b/benchmarks/old_opencl/spmv/input/DESCRIPTION similarity index 100% rename from benchmarks/opencl/spmv/input/DESCRIPTION rename to benchmarks/old_opencl/spmv/input/DESCRIPTION diff --git a/benchmarks/opencl/spmv/input/vector.bin b/benchmarks/old_opencl/spmv/input/vector.bin similarity index 100% rename from benchmarks/opencl/spmv/input/vector.bin rename to benchmarks/old_opencl/spmv/input/vector.bin diff --git a/benchmarks/opencl/spmv/kernel.cl b/benchmarks/old_opencl/spmv/kernel.cl similarity index 100% rename from benchmarks/opencl/spmv/kernel.cl rename to benchmarks/old_opencl/spmv/kernel.cl diff --git a/benchmarks/opencl/spmv/libspmv.a b/benchmarks/old_opencl/spmv/libspmv.a similarity index 100% rename from benchmarks/opencl/spmv/libspmv.a rename to benchmarks/old_opencl/spmv/libspmv.a diff --git a/benchmarks/opencl/spmv/main.cc b/benchmarks/old_opencl/spmv/main.cc similarity index 100% rename from benchmarks/opencl/spmv/main.cc rename to benchmarks/old_opencl/spmv/main.cc diff --git a/benchmarks/opencl/spmv/mmio.c b/benchmarks/old_opencl/spmv/mmio.c similarity index 100% rename from benchmarks/opencl/spmv/mmio.c rename to benchmarks/old_opencl/spmv/mmio.c diff --git a/benchmarks/opencl/spmv/mmio.h b/benchmarks/old_opencl/spmv/mmio.h similarity index 100% rename from benchmarks/opencl/spmv/mmio.h rename to benchmarks/old_opencl/spmv/mmio.h diff --git a/benchmarks/opencl/spmv/ocl.c b/benchmarks/old_opencl/spmv/ocl.c similarity index 100% rename from benchmarks/opencl/spmv/ocl.c rename to benchmarks/old_opencl/spmv/ocl.c diff --git a/benchmarks/opencl/spmv/ocl.h b/benchmarks/old_opencl/spmv/ocl.h similarity index 100% rename from benchmarks/opencl/spmv/ocl.h rename to benchmarks/old_opencl/spmv/ocl.h diff --git a/benchmarks/opencl/spmv/parboil.h b/benchmarks/old_opencl/spmv/parboil.h similarity index 100% rename from benchmarks/opencl/spmv/parboil.h rename to benchmarks/old_opencl/spmv/parboil.h diff --git a/benchmarks/opencl/spmv/parboil_opencl.c b/benchmarks/old_opencl/spmv/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/spmv/parboil_opencl.c rename to benchmarks/old_opencl/spmv/parboil_opencl.c diff --git a/benchmarks/opencl/spmv/perf_util.c b/benchmarks/old_opencl/spmv/perf_util.c similarity index 100% rename from benchmarks/opencl/spmv/perf_util.c rename to benchmarks/old_opencl/spmv/perf_util.c diff --git a/benchmarks/opencl/spmv/perf_util.h b/benchmarks/old_opencl/spmv/perf_util.h similarity index 100% rename from benchmarks/opencl/spmv/perf_util.h rename to benchmarks/old_opencl/spmv/perf_util.h diff --git a/benchmarks/opencl/spmv/perfmon.c b/benchmarks/old_opencl/spmv/perfmon.c similarity index 100% rename from benchmarks/opencl/spmv/perfmon.c rename to benchmarks/old_opencl/spmv/perfmon.c diff --git a/benchmarks/opencl/spmv/perfmon.h b/benchmarks/old_opencl/spmv/perfmon.h similarity index 100% rename from benchmarks/opencl/spmv/perfmon.h rename to benchmarks/old_opencl/spmv/perfmon.h diff --git a/benchmarks/opencl/spmv/stub.cc b/benchmarks/old_opencl/spmv/stub.cc similarity index 100% rename from benchmarks/opencl/spmv/stub.cc rename to benchmarks/old_opencl/spmv/stub.cc diff --git a/benchmarks/opencl/spmv/vector.bin b/benchmarks/old_opencl/spmv/vector.bin similarity index 100% rename from benchmarks/opencl/spmv/vector.bin rename to benchmarks/old_opencl/spmv/vector.bin diff --git a/benchmarks/opencl/stencil/128x128x32.bin b/benchmarks/old_opencl/stencil/128x128x32.bin similarity index 100% rename from benchmarks/opencl/stencil/128x128x32.bin rename to benchmarks/old_opencl/stencil/128x128x32.bin diff --git a/benchmarks/opencl/stencil/Makefile b/benchmarks/old_opencl/stencil/Makefile similarity index 100% rename from benchmarks/opencl/stencil/Makefile rename to benchmarks/old_opencl/stencil/Makefile diff --git a/benchmarks/opencl/stencil/args.c b/benchmarks/old_opencl/stencil/args.c similarity index 100% rename from benchmarks/opencl/stencil/args.c rename to benchmarks/old_opencl/stencil/args.c diff --git a/benchmarks/opencl/stencil/file.c b/benchmarks/old_opencl/stencil/file.c similarity index 100% rename from benchmarks/opencl/stencil/file.c rename to benchmarks/old_opencl/stencil/file.c diff --git a/benchmarks/opencl/stencil/file.h b/benchmarks/old_opencl/stencil/file.h similarity index 100% rename from benchmarks/opencl/stencil/file.h rename to benchmarks/old_opencl/stencil/file.h diff --git a/benchmarks/opencl/stencil/gpu_info.c b/benchmarks/old_opencl/stencil/gpu_info.c similarity index 100% rename from benchmarks/opencl/stencil/gpu_info.c rename to benchmarks/old_opencl/stencil/gpu_info.c diff --git a/benchmarks/opencl/stencil/gpu_info.h b/benchmarks/old_opencl/stencil/gpu_info.h similarity index 100% rename from benchmarks/opencl/stencil/gpu_info.h rename to benchmarks/old_opencl/stencil/gpu_info.h diff --git a/benchmarks/opencl/stencil/kernel.cl b/benchmarks/old_opencl/stencil/kernel.cl similarity index 100% rename from benchmarks/opencl/stencil/kernel.cl rename to benchmarks/old_opencl/stencil/kernel.cl diff --git a/benchmarks/opencl/stencil/libstencil.a b/benchmarks/old_opencl/stencil/libstencil.a similarity index 100% rename from benchmarks/opencl/stencil/libstencil.a rename to benchmarks/old_opencl/stencil/libstencil.a diff --git a/benchmarks/opencl/stencil/main.cc b/benchmarks/old_opencl/stencil/main.cc similarity index 100% rename from benchmarks/opencl/stencil/main.cc rename to benchmarks/old_opencl/stencil/main.cc diff --git a/benchmarks/opencl/stencil/ocl.c b/benchmarks/old_opencl/stencil/ocl.c similarity index 100% rename from benchmarks/opencl/stencil/ocl.c rename to benchmarks/old_opencl/stencil/ocl.c diff --git a/benchmarks/opencl/stencil/ocl.h b/benchmarks/old_opencl/stencil/ocl.h similarity index 100% rename from benchmarks/opencl/stencil/ocl.h rename to benchmarks/old_opencl/stencil/ocl.h diff --git a/benchmarks/opencl/stencil/parboil.h b/benchmarks/old_opencl/stencil/parboil.h similarity index 100% rename from benchmarks/opencl/stencil/parboil.h rename to benchmarks/old_opencl/stencil/parboil.h diff --git a/benchmarks/opencl/stencil/parboil_opencl.c b/benchmarks/old_opencl/stencil/parboil_opencl.c similarity index 100% rename from benchmarks/opencl/stencil/parboil_opencl.c rename to benchmarks/old_opencl/stencil/parboil_opencl.c diff --git a/benchmarks/old_opencl/transpose/Makefile b/benchmarks/old_opencl/transpose/Makefile new file mode 100644 index 00000000..3e0e68b0 --- /dev/null +++ b/benchmarks/old_opencl/transpose/Makefile @@ -0,0 +1,66 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) -I. + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT=transpose + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: transpose.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: main.cc lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: main.cc lib$(PROJECT).a + $(CXX) $(CXXFLAGS) main.cc transpose_gold.cpp $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.elf *.dump *.hex diff --git a/benchmarks/new_opencl/transpose/main.cc b/benchmarks/old_opencl/transpose/main.cc similarity index 95% rename from benchmarks/new_opencl/transpose/main.cc rename to benchmarks/old_opencl/transpose/main.cc index f72cb851..26122ab4 100644 --- a/benchmarks/new_opencl/transpose/main.cc +++ b/benchmarks/old_opencl/transpose/main.cc @@ -20,8 +20,8 @@ */ // standard utility and system includes -#include "oclUtils.h" -#include "shrQATest.h" +#include +#include #define BLOCK_DIM 16 @@ -152,28 +152,7 @@ double transposeGPU(const char* kernelName, bool useLocalMem, cl_uint ciDeviceC return time; } -uint8_t *kernel_bin = NULL; -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} //! Run a simple test for CUDA // ********************************************************************* int runTest( const int argc, const char** argv) @@ -307,11 +286,10 @@ int runTest( const int argc, const char** argv) //oclCheckError(source_path != NULL, shrTRUE); char *source = oclLoadProgSource(source_path, "", &program_length); //oclCheckError(source != NULL, shrTRUE); - size_t kernel_size; - cl_int binary_status = 0; - cl_device_id device_id; + // create the program - rv_program = clCreateProgramWithBinary(cxGPUContext, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, NULL); + rv_program = + clCreateProgramWithBuiltInKernels(context, 1, &device_id, "transpose", NULL); //rv_program = clCreateProgramWithSource(cxGPUContext, 1, // (const char **)&source, &program_length, &ciErrNum); //oclCheckError(ciErrNum, CL_SUCCESS); diff --git a/benchmarks/new_opencl/transpose/oclUtils.h b/benchmarks/old_opencl/transpose/oclUtils.h similarity index 97% rename from benchmarks/new_opencl/transpose/oclUtils.h rename to benchmarks/old_opencl/transpose/oclUtils.h index 096612a8..2b109e18 100644 --- a/benchmarks/new_opencl/transpose/oclUtils.h +++ b/benchmarks/old_opencl/transpose/oclUtils.h @@ -1,198 +1,198 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef OCL_UTILS_H -#define OCL_UTILS_H - -// ********************************************************************* -// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK -// ********************************************************************* - -// Common headers: Cross-API utililties and OpenCL header -#include "shrUtils.h" - -// All OpenCL headers -#if defined (__APPLE__) || defined(MACOSX) - #include -#else - #include -#endif - -// Includes -#include -#include -#include - -// For systems with CL_EXT that are not updated with these extensions, we copied these -// extensions from -#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ - #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 - #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 - #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 - #define CL_DEVICE_WARP_SIZE_NV 0x4003 - #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 - #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 - #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 -#endif - -// reminders for build output window and log -#ifdef _WIN32 - #pragma message ("Note: including shrUtils.h") - #pragma message ("Note: including opencl.h") -#endif - -// SDK Revision # -#define OCL_SDKREVISION "7027912" - -// Error and Exit Handling Macros... -// ********************************************************************* -// Full error handling macro with Cleanup() callback (if supplied)... -// (Companion Inline Function lower on page) -#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__) - -// Short version without Cleanup() callback pointer -// Both Input (a) and Reference (b) are specified as args -#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0) - -////////////////////////////////////////////////////////////////////////////// -//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0 -//! -//! @return the id -//! @param clSelectedPlatformID OpenCL platform ID -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID); - -////////////////////////////////////////////////////////////////////////////// -//! Print info about the device -//! -//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Get and return device capability -//! -//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" int oclGetDevCap(cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Print the device name -//! -//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclPrintDevName(int iLogMode, cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the first device from the context -//! -//! @return the id -//! @param cxGPUContext OpenCL context -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the nth device from the context -//! -//! @return the id or -1 when out of range -//! @param cxGPUContext OpenCL context -//! @param device_idx index of the device of interest -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of device with maximal FLOPS from the context -//! -//! @return the id -//! @param cxGPUContext OpenCL context -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext); - -////////////////////////////////////////////////////////////////////////////// -//! Loads a Program file and prepends the cPreamble to the code. -//! -//! @return the source string if succeeded, 0 otherwise -//! @param cFilename program filename -//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header -//! @param szFinalLength returned length of the code string -////////////////////////////////////////////////////////////////////////////// -extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); - -////////////////////////////////////////////////////////////////////////////// -//! Get the binary (PTX) of the program associated with the device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -//! @param binary returned code -//! @param length length of returned code -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length); - -////////////////////////////////////////////////////////////////////////////// -//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -//! @param const char* cPtxFileName optional PTX file name -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName); - -////////////////////////////////////////////////////////////////////////////// -//! Get and log the Build Log from the OpenCL compiler for the requested program & device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); - -// Helper function for De-allocating cl objects -// ********************************************************************* -extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs); - -// Helper function to get OpenCL error string from constant -// ********************************************************************* -extern "C" const char* oclErrorString(cl_int error); - -// Helper function to get OpenCL image format string (channel order and type) from constant -// ********************************************************************* -extern "C" const char* oclImageFormatString(cl_uint uiImageFormat); - -// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) -// ********************************************************************* -inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) -{ - // An error condition is defined by the sample/test value not equal to the reference - if (iReference != iSample) - { - // If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value - iSample = (iSample == 0) ? -9999 : iSample; - - // Log the error info - shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile); - - // Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code. - if (pCleanup != NULL) - { - pCleanup(iSample); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); - exit(iSample); - } - } -} - +/* + * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +#ifndef OCL_UTILS_H +#define OCL_UTILS_H + +// ********************************************************************* +// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK +// ********************************************************************* + +// Common headers: Cross-API utililties and OpenCL header +#include + +// All OpenCL headers +#if defined (__APPLE__) || defined(MACOSX) + #include +#else + #include +#endif + +// Includes +#include +#include +#include + +// For systems with CL_EXT that are not updated with these extensions, we copied these +// extensions from +#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ + #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 + #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 + #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 + #define CL_DEVICE_WARP_SIZE_NV 0x4003 + #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 + #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 + #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +#endif + +// reminders for build output window and log +#ifdef _WIN32 + #pragma message ("Note: including shrUtils.h") + #pragma message ("Note: including opencl.h") +#endif + +// SDK Revision # +#define OCL_SDKREVISION "7027912" + +// Error and Exit Handling Macros... +// ********************************************************************* +// Full error handling macro with Cleanup() callback (if supplied)... +// (Companion Inline Function lower on page) +#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__) + +// Short version without Cleanup() callback pointer +// Both Input (a) and Reference (b) are specified as args +#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0) + +////////////////////////////////////////////////////////////////////////////// +//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0 +//! +//! @return the id +//! @param clSelectedPlatformID OpenCL platform ID +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID); + +////////////////////////////////////////////////////////////////////////////// +//! Print info about the device +//! +//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Get and return device capability +//! +//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" int oclGetDevCap(cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Print the device name +//! +//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclPrintDevName(int iLogMode, cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the first device from the context +//! +//! @return the id +//! @param cxGPUContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the nth device from the context +//! +//! @return the id or -1 when out of range +//! @param cxGPUContext OpenCL context +//! @param device_idx index of the device of interest +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of device with maximal FLOPS from the context +//! +//! @return the id +//! @param cxGPUContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext); + +////////////////////////////////////////////////////////////////////////////// +//! Loads a Program file and prepends the cPreamble to the code. +//! +//! @return the source string if succeeded, 0 otherwise +//! @param cFilename program filename +//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header +//! @param szFinalLength returned length of the code string +////////////////////////////////////////////////////////////////////////////// +extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); + +////////////////////////////////////////////////////////////////////////////// +//! Get the binary (PTX) of the program associated with the device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +//! @param binary returned code +//! @param length length of returned code +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length); + +////////////////////////////////////////////////////////////////////////////// +//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +//! @param const char* cPtxFileName optional PTX file name +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName); + +////////////////////////////////////////////////////////////////////////////// +//! Get and log the Build Log from the OpenCL compiler for the requested program & device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); + +// Helper function for De-allocating cl objects +// ********************************************************************* +extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs); + +// Helper function to get OpenCL error string from constant +// ********************************************************************* +extern "C" const char* oclErrorString(cl_int error); + +// Helper function to get OpenCL image format string (channel order and type) from constant +// ********************************************************************* +extern "C" const char* oclImageFormatString(cl_uint uiImageFormat); + +// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) +// ********************************************************************* +inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) +{ + // An error condition is defined by the sample/test value not equal to the reference + if (iReference != iSample) + { + // If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value + iSample = (iSample == 0) ? -9999 : iSample; + + // Log the error info + shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile); + + // Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code. + if (pCleanup != NULL) + { + pCleanup(iSample); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); + exit(iSample); + } + } +} + #endif \ No newline at end of file diff --git a/benchmarks/new_opencl/transpose/shrQATest.h b/benchmarks/old_opencl/transpose/shrQATest.h similarity index 96% rename from benchmarks/new_opencl/transpose/shrQATest.h rename to benchmarks/old_opencl/transpose/shrQATest.h index 93d2d9eb..245cf8dc 100644 --- a/benchmarks/new_opencl/transpose/shrQATest.h +++ b/benchmarks/old_opencl/transpose/shrQATest.h @@ -1,238 +1,238 @@ -/* -* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. -* -* Please refer to the NVIDIA end user license agreement (EULA) associated -* with this source code for terms and conditions that govern your use of -* this software. Any use, reproduction, disclosure, or distribution of -* this software and related documentation outside the terms of the EULA -* is strictly prohibited. -* -*/ - -#ifndef SHR_QATEST_H -#define SHR_QATEST_H - -// ********************************************************************* -// Generic utilities for NVIDIA GPU Computing SDK -// ********************************************************************* - -// OS dependent includes -#ifdef _WIN32 - #pragma message ("Note: including windows.h") - #pragma message ("Note: including math.h") - #pragma message ("Note: including assert.h") - #pragma message ("Note: including time.h") - -// Headers needed for Windows - #include - #include -#else - // Headers needed for Linux - #include - #include - #include - #include - #include - #include - #include - #include - #include -#endif - -#ifndef STRCASECMP -#ifdef _WIN32 -#define STRCASECMP _stricmp -#else -#define STRCASECMP strcasecmp -#endif -#endif - -#ifndef STRNCASECMP -#ifdef _WIN32 -#define STRNCASECMP _strnicmp -#else -#define STRNCASECMP strncasecmp -#endif -#endif - - -// Standardized QA Start/Finish for CUDA SDK tests -#define shrQAStart(a, b) __shrQAStart(a, b) -#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c) -#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d) - -inline int findExeNameStart(const char *exec_name) -{ - int exename_start = (int)strlen(exec_name); - - while( (exename_start > 0) && - (exec_name[exename_start] != '\\') && - (exec_name[exename_start] != '/') ) - { - exename_start--; - } - if (exec_name[exename_start] == '\\' || - exec_name[exename_start] == '/') - { - return exename_start+1; - } else { - return exename_start; - } -} - -inline int __shrQAStart(int argc, char **argv) -{ - bool bQATest = false; - // First clear the output buffer - fflush(stdout); - fflush(stdout); - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - char *string_argv = &argv[i][string_start]; - - if (!STRCASECMP(string_argv, "qatest")) { - bQATest = true; - } - } - - // We don't want to print the entire path, so we search for the first - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start])); - } - fflush(stdout); - printf("\n"); fflush(stdout); - return exename_start; -} - -enum eQAstatus { - QA_FAILED = 0, - QA_PASSED = 1, - QA_WAIVED = 2 -}; - -inline void __ExitInTime(int seconds) -{ - fprintf(stdout, "> exiting in %d seconds: ", seconds); - fflush(stdout); - time_t t; - int count; - for (t=time(0)+seconds, count=seconds; time(0) < t; count--) { - fprintf(stdout, "%d...", count); -#ifdef WIN32 - Sleep(1000); -#else - sleep(1); -#endif - } - fprintf(stdout,"done!\n\n"); - fflush(stdout); -} - - -inline void __shrQAFinish(int argc, const char **argv, int iStatus) -{ - // By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() ) - bool bQATest = false, bNoPrompt = true, bQuitInTime = true; - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - - const char *string_argv = &argv[i][string_start]; - if (!STRCASECMP(string_argv, "qatest")) { - bQATest = true; - } - // For SDK individual samples that don't specify -noprompt or -prompt, - // a 3 second delay will happen before exiting, giving a user time to view results - if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { - bNoPrompt = true; - bQuitInTime = false; - } - if (!STRCASECMP(string_argv, "prompt")) { - bNoPrompt = false; - bQuitInTime = false; - } - } - - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); - } - fflush(stdout); - printf("\n"); fflush(stdout); - if (bQuitInTime) { - __ExitInTime(3); - } else { - if (!bNoPrompt) { - fprintf(stdout, "\nPress to exit...\n"); - fflush(stdout); - getchar(); - } - } -} - -inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus) -{ - bool bQuitInTime = true; - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - - const char *string_argv = &argv[i][string_start]; - // For SDK individual samples that don't specify -noprompt or -prompt, - // a 3 second delay will happen before exiting, giving a user time to view results - if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { - bQuitInTime = false; - } - if (!STRCASECMP(string_argv, "prompt")) { - bQuitInTime = false; - } - } - - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); - } - fflush(stdout); - - if (bQuitInTime) { - __ExitInTime(3); - } -} - -inline void shrQAFinishExit(int argc, const char **argv, int iStatus) -{ - __shrQAFinish(argc, argv, iStatus); - - exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); -} - -inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus) -{ - __shrQAFinish2(bQAtest, argc, argv, iStatus); - - exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); -} - +/* +* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#ifndef SHR_QATEST_H +#define SHR_QATEST_H + +// ********************************************************************* +// Generic utilities for NVIDIA GPU Computing SDK +// ********************************************************************* + +// OS dependent includes +#ifdef _WIN32 + #pragma message ("Note: including windows.h") + #pragma message ("Note: including math.h") + #pragma message ("Note: including assert.h") + #pragma message ("Note: including time.h") + +// Headers needed for Windows + #include + #include +#else + // Headers needed for Linux + #include + #include + #include + #include + #include + #include + #include + #include + #include +#endif + +#ifndef STRCASECMP +#ifdef _WIN32 +#define STRCASECMP _stricmp +#else +#define STRCASECMP strcasecmp +#endif +#endif + +#ifndef STRNCASECMP +#ifdef _WIN32 +#define STRNCASECMP _strnicmp +#else +#define STRNCASECMP strncasecmp +#endif +#endif + + +// Standardized QA Start/Finish for CUDA SDK tests +#define shrQAStart(a, b) __shrQAStart(a, b) +#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c) +#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d) + +inline int findExeNameStart(const char *exec_name) +{ + int exename_start = (int)strlen(exec_name); + + while( (exename_start > 0) && + (exec_name[exename_start] != '\\') && + (exec_name[exename_start] != '/') ) + { + exename_start--; + } + if (exec_name[exename_start] == '\\' || + exec_name[exename_start] == '/') + { + return exename_start+1; + } else { + return exename_start; + } +} + +inline int __shrQAStart(int argc, char **argv) +{ + bool bQATest = false; + // First clear the output buffer + fflush(stdout); + fflush(stdout); + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + char *string_argv = &argv[i][string_start]; + + if (!STRCASECMP(string_argv, "qatest")) { + bQATest = true; + } + } + + // We don't want to print the entire path, so we search for the first + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start])); + } + fflush(stdout); + printf("\n"); fflush(stdout); + return exename_start; +} + +enum eQAstatus { + QA_FAILED = 0, + QA_PASSED = 1, + QA_WAIVED = 2 +}; + +inline void __ExitInTime(int seconds) +{ + fprintf(stdout, "> exiting in %d seconds: ", seconds); + fflush(stdout); + time_t t; + int count; + for (t=time(0)+seconds, count=seconds; time(0) < t; count--) { + fprintf(stdout, "%d...", count); +#ifdef WIN32 + Sleep(1000); +#else + sleep(1); +#endif + } + fprintf(stdout,"done!\n\n"); + fflush(stdout); +} + + +inline void __shrQAFinish(int argc, const char **argv, int iStatus) +{ + // By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() ) + bool bQATest = false, bNoPrompt = true, bQuitInTime = true; + const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + + const char *string_argv = &argv[i][string_start]; + if (!STRCASECMP(string_argv, "qatest")) { + bQATest = true; + } + // For SDK individual samples that don't specify -noprompt or -prompt, + // a 3 second delay will happen before exiting, giving a user time to view results + if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { + bNoPrompt = true; + bQuitInTime = false; + } + if (!STRCASECMP(string_argv, "prompt")) { + bNoPrompt = false; + bQuitInTime = false; + } + } + + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); + } + fflush(stdout); + printf("\n"); fflush(stdout); + if (bQuitInTime) { + __ExitInTime(3); + } else { + if (!bNoPrompt) { + fprintf(stdout, "\nPress to exit...\n"); + fflush(stdout); + getchar(); + } + } +} + +inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus) +{ + bool bQuitInTime = true; + const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + + const char *string_argv = &argv[i][string_start]; + // For SDK individual samples that don't specify -noprompt or -prompt, + // a 3 second delay will happen before exiting, giving a user time to view results + if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { + bQuitInTime = false; + } + if (!STRCASECMP(string_argv, "prompt")) { + bQuitInTime = false; + } + } + + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); + } + fflush(stdout); + + if (bQuitInTime) { + __ExitInTime(3); + } +} + +inline void shrQAFinishExit(int argc, const char **argv, int iStatus) +{ + __shrQAFinish(argc, argv, iStatus); + + exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); +} + +inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus) +{ + __shrQAFinish2(bQAtest, argc, argv, iStatus); + + exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); +} + #endif \ No newline at end of file diff --git a/benchmarks/new_opencl/transpose/shrUtils.h b/benchmarks/old_opencl/transpose/shrUtils.h similarity index 98% rename from benchmarks/new_opencl/transpose/shrUtils.h rename to benchmarks/old_opencl/transpose/shrUtils.h index 45ace670..0f2795d4 100644 --- a/benchmarks/new_opencl/transpose/shrUtils.h +++ b/benchmarks/old_opencl/transpose/shrUtils.h @@ -1,642 +1,642 @@ -/* -* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. -* -* Please refer to the NVIDIA end user license agreement (EULA) associated -* with this source code for terms and conditions that govern your use of -* this software. Any use, reproduction, disclosure, or distribution of -* this software and related documentation outside the terms of the EULA -* is strictly prohibited. -* -*/ - -#ifndef SHR_UTILS_H -#define SHR_UTILS_H - -// ********************************************************************* -// Generic utilities for NVIDIA GPU Computing SDK -// ********************************************************************* - -// reminders for output window and build log -#ifdef _WIN32 - #pragma message ("Note: including windows.h") - #pragma message ("Note: including math.h") - #pragma message ("Note: including assert.h") -#endif - -// OS dependent includes -#ifdef _WIN32 - // Headers needed for Windows - #include -#else - // Headers needed for Linux - #include - #include - #include - #include - #include - #include - #include -#endif - -// Other headers needed for both Windows and Linux -#include -#include -#include -#include -#include - -// Un-comment the following #define to enable profiling code in SDK apps -//#define GPU_PROFILING - -// Beginning of GPU Architecture definitions -inline int ConvertSMVer2Cores(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class - { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class - { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class - { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class - { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class - { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class - { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class - { -1, -1 } - }; - - int index = 0; - while (nGpuArchCoresPerSM[index].SM != -1) { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { - return nGpuArchCoresPerSM[index].Cores; - } - index++; - } - printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor); - return -1; -} -// end of GPU Architecture definitions - - -// Defines and enum for use with logging functions -// ********************************************************************* -#define DEFAULTLOGFILE "SdkConsoleLog.txt" -#define MASTERLOGFILE "SdkMasterLog.csv" -enum LOGMODES -{ - LOGCONSOLE = 1, // bit to signal "log to console" - LOGFILE = 2, // bit to signal "log to file" - LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both" - APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open - MASTER = 8, // bit to signal master .csv log output - ERRORMSG = 16, // bit to signal "pre-pend Error" - CLOSELOG = 32 // bit to close log file, if open, after any requested file write -}; -#define HDASHLINE "-----------------------------------------------------------\n" - -// Standardized boolean -enum shrBOOL -{ - shrFALSE = 0, - shrTRUE = 1 -}; - -// Standardized MAX, MIN and CLAMP -#define MAX(a, b) ((a > b) ? a : b) -#define MIN(a, b) ((a < b) ? a : b) -#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a -#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a - -// Error and Exit Handling Macros... -// ********************************************************************* -// Full error handling macro with Cleanup() callback (if supplied)... -// (Companion Inline Function lower on page) -#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__) - -// Short version without Cleanup() callback pointer -// Both Input (a) and Reference (b) are specified as args -#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0) - -// Standardized Exit Macro for leaving main()... extended version -// (Companion Inline Function lower on page) -#define shrExitEX(a, b, c) __shrExitEX(a, b, c) - -// Standardized Exit Macro for leaving main()... short version -// (Companion Inline Function lower on page) -#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS) - -// Simple argument checker macro -#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE - -// Define for user-customized error handling -#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__ - -// Function to deallocate memory allocated within shrUtils -// ********************************************************************* -extern "C" void shrFree(void* ptr); - -// ********************************************************************* -// Helper function to log standardized information to Console, to File or to both -//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n"); -//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); -//! -//! Automatically opens file and stores handle if needed and not done yet -//! Closes file and nulls handle on request -//! -//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG. -//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode -//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close" -//! First 3 options may be | 'd with MASTER to enable independent write to master data log file -//! First 3 options may be | 'd with ERRORMSG to start line with standard error message -//! @param 2 dValue: -//! Positive val = double value for time in secs to be formatted to 6 decimals. -//! Negative val is an error code and this give error preformatting. -//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf. -//! ALL printf flags, width, precision and type specifiers are supported with this exception: -//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported -//! Single byte char type specifiers (%s and %c) ARE supported -//! @param 4... variable args: like printf or fprintf. Must match format specifer type above. -//! @return 0 if OK, negative value on error or if error occurs or was passed in. -// ********************************************************************* -extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...); - -// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0, -// ********************************************************************* -extern "C" int shrLog(const char* cFormatString, ...); - -// ********************************************************************* -// Delta timer function for up to 3 independent timers using host high performance counters -// Maintains state for 3 independent counters -//! Example: double dElapsedTime = shrDeltaTime(0); -//! -//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2) -//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error -// ********************************************************************* -extern "C" double shrDeltaT(int iCounterID); - -// Optional LogFileNameOverride function -// ********************************************************************* -extern "C" void shrSetLogFileName (const char* cOverRideName); - -// Helper function to init data arrays -// ********************************************************************* -extern "C" void shrFillArray(float* pfData, int iSize); - -// Helper function to print data arrays -// ********************************************************************* -extern "C" void shrPrintArray(float* pfData, int iSize); - -//////////////////////////////////////////////////////////////////////////// -//! Find the path for a filename -//! @return the path if succeeded, otherwise 0 -//! @param filename name of the file -//! @param executablePath optional absolute path of the executable -//////////////////////////////////////////////////////////////////////////// -extern "C" char* shrFindFilePath(const char* filename, const char* executablePath); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing single precision floating point data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing double precision floating point data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing integer data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned integer data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data, - unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing char / byte data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned char / byte data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data, - unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing single precision floating point -//! data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len, - const float epsilon, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing double precision floating point -//! data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len, - const double epsilon, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing integer data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned integer data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing char / byte data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned char / byte data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Load PPM image file (with unsigned char as data element type), padding -//! 4th component -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param OutData handle to the data read -//! @param w width of the image -//! @param h height of the image -//! -//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PPM image file (with unsigned char as data element type, padded to -//! 4 bytes) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with unsigned char as data element type) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with unsigned char as data element type) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data, - unsigned int *w,unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -// Command line arguments: General notes -// * All command line arguments begin with '--' followed by the token; -// token and value are seperated by '='; example --samples=50 -// * Arrays have the form --model=[one.obj,two.obj,three.obj] -// (without whitespaces) -//////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////// -//! Check if command line argument \a flag-name is given -//! @return shrTRUE if command line argument \a flag_name has been given, -//! otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param flag_name name of command line flag -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv, - const char* flag_name); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type int -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv, - const char* arg_name, int* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type unsigned int -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv, - const char* arg_name, unsigned int* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type float -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv, - const char* arg_name, float* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type string -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv, - const char* arg_name, char** val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument list those element are strings -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val command line argument list -//! @param len length of the list / number of elements -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv, - const char* arg_name, char** val, - unsigned int* len); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparef( const float* reference, const float* data, - const unsigned int len); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparei( const int* reference, const int* data, - const unsigned int len ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned integer arrays, with epsilon and threshold -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned char arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data, - const unsigned int len ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integers with a tolernance for # of byte errors -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays witha n epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparefe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality and a -//! threshold for # pixel errors -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparefet( const float* reference, const float* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays using L2-norm with an epsilon tolerance for -//! equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two PPM image files with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param src_file filename for the image to be compared -//! @param data filename for the reference data / gold image -//! @param epsilon epsilon to use for the comparison -//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) -//! $param verboseErrors output details of image mismatch to std::err -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two PGM image files with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param src_file filename for the image to be compared -//! @param data filename for the reference data / gold image -//! @param epsilon epsilon to use for the comparison -//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) -//! $param verboseErrors output details of image mismatch to std::err -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); - -extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size); - -extern "C" size_t shrRoundUp(int group_size, int global_size); - -// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) -// ********************************************************************* -inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) -{ - if (iReference != iSample) - { - shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile); - if (pCleanup != NULL) - { - pCleanup(EXIT_FAILURE); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); - exit(EXIT_FAILURE); - } - } -} - -// Standardized Exit -// ********************************************************************* -inline void __shrExitEX(int argc, const char** argv, int iExitCode) -{ -#ifdef WIN32 - if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) -#else - if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) -#endif - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress to Quit...\n"); - getchar(); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]); - } - fflush(stderr); - exit(iExitCode); -} - +/* +* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#ifndef SHR_UTILS_H +#define SHR_UTILS_H + +// ********************************************************************* +// Generic utilities for NVIDIA GPU Computing SDK +// ********************************************************************* + +// reminders for output window and build log +#ifdef _WIN32 + #pragma message ("Note: including windows.h") + #pragma message ("Note: including math.h") + #pragma message ("Note: including assert.h") +#endif + +// OS dependent includes +#ifdef _WIN32 + // Headers needed for Windows + #include +#else + // Headers needed for Linux + #include + #include + #include + #include + #include + #include + #include +#endif + +// Other headers needed for both Windows and Linux +#include +#include +#include +#include +#include + +// Un-comment the following #define to enable profiling code in SDK apps +//#define GPU_PROFILING + +// Beginning of GPU Architecture definitions +inline int ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = + { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class + { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class + { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class + { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class + { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class + { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class + { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class + { -1, -1 } + }; + + int index = 0; + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { + return nGpuArchCoresPerSM[index].Cores; + } + index++; + } + printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor); + return -1; +} +// end of GPU Architecture definitions + + +// Defines and enum for use with logging functions +// ********************************************************************* +#define DEFAULTLOGFILE "SdkConsoleLog.txt" +#define MASTERLOGFILE "SdkMasterLog.csv" +enum LOGMODES +{ + LOGCONSOLE = 1, // bit to signal "log to console" + LOGFILE = 2, // bit to signal "log to file" + LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both" + APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open + MASTER = 8, // bit to signal master .csv log output + ERRORMSG = 16, // bit to signal "pre-pend Error" + CLOSELOG = 32 // bit to close log file, if open, after any requested file write +}; +#define HDASHLINE "-----------------------------------------------------------\n" + +// Standardized boolean +enum shrBOOL +{ + shrFALSE = 0, + shrTRUE = 1 +}; + +// Standardized MAX, MIN and CLAMP +#define MAX(a, b) ((a > b) ? a : b) +#define MIN(a, b) ((a < b) ? a : b) +#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a +#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a + +// Error and Exit Handling Macros... +// ********************************************************************* +// Full error handling macro with Cleanup() callback (if supplied)... +// (Companion Inline Function lower on page) +#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__) + +// Short version without Cleanup() callback pointer +// Both Input (a) and Reference (b) are specified as args +#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0) + +// Standardized Exit Macro for leaving main()... extended version +// (Companion Inline Function lower on page) +#define shrExitEX(a, b, c) __shrExitEX(a, b, c) + +// Standardized Exit Macro for leaving main()... short version +// (Companion Inline Function lower on page) +#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS) + +// Simple argument checker macro +#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE + +// Define for user-customized error handling +#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__ + +// Function to deallocate memory allocated within shrUtils +// ********************************************************************* +extern "C" void shrFree(void* ptr); + +// ********************************************************************* +// Helper function to log standardized information to Console, to File or to both +//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n"); +//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); +//! +//! Automatically opens file and stores handle if needed and not done yet +//! Closes file and nulls handle on request +//! +//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG. +//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode +//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close" +//! First 3 options may be | 'd with MASTER to enable independent write to master data log file +//! First 3 options may be | 'd with ERRORMSG to start line with standard error message +//! @param 2 dValue: +//! Positive val = double value for time in secs to be formatted to 6 decimals. +//! Negative val is an error code and this give error preformatting. +//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf. +//! ALL printf flags, width, precision and type specifiers are supported with this exception: +//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported +//! Single byte char type specifiers (%s and %c) ARE supported +//! @param 4... variable args: like printf or fprintf. Must match format specifer type above. +//! @return 0 if OK, negative value on error or if error occurs or was passed in. +// ********************************************************************* +extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...); + +// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0, +// ********************************************************************* +extern "C" int shrLog(const char* cFormatString, ...); + +// ********************************************************************* +// Delta timer function for up to 3 independent timers using host high performance counters +// Maintains state for 3 independent counters +//! Example: double dElapsedTime = shrDeltaTime(0); +//! +//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2) +//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error +// ********************************************************************* +extern "C" double shrDeltaT(int iCounterID); + +// Optional LogFileNameOverride function +// ********************************************************************* +extern "C" void shrSetLogFileName (const char* cOverRideName); + +// Helper function to init data arrays +// ********************************************************************* +extern "C" void shrFillArray(float* pfData, int iSize); + +// Helper function to print data arrays +// ********************************************************************* +extern "C" void shrPrintArray(float* pfData, int iSize); + +//////////////////////////////////////////////////////////////////////////// +//! Find the path for a filename +//! @return the path if succeeded, otherwise 0 +//! @param filename name of the file +//! @param executablePath optional absolute path of the executable +//////////////////////////////////////////////////////////////////////////// +extern "C" char* shrFindFilePath(const char* filename, const char* executablePath); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing single precision floating point data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing double precision floating point data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing integer data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing unsigned integer data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data, + unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing char / byte data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing unsigned char / byte data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data, + unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing single precision floating point +//! data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//! @param epsilon epsilon for comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len, + const float epsilon, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing double precision floating point +//! data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//! @param epsilon epsilon for comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len, + const double epsilon, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing integer data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing unsigned integer data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data, + unsigned int len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing char / byte data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing unsigned char / byte data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data, + unsigned int len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Load PPM image file (with unsigned char as data element type), padding +//! 4th component +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param OutData handle to the data read +//! @param w width of the image +//! @param h height of the image +//! +//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData, + unsigned int *w, unsigned int *h); + +//////////////////////////////////////////////////////////////////////////// +//! Save PPM image file (with unsigned char as data element type, padded to +//! 4 bytes) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data, + unsigned int w, unsigned int h); + +//////////////////////////////////////////////////////////////////////////////// +//! Save PGM image file (with unsigned char as data element type) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data, + unsigned int w, unsigned int h); + +//////////////////////////////////////////////////////////////////////////// +//! Load PGM image file (with unsigned char as data element type) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data, + unsigned int *w,unsigned int *h); + +//////////////////////////////////////////////////////////////////////////// +// Command line arguments: General notes +// * All command line arguments begin with '--' followed by the token; +// token and value are seperated by '='; example --samples=50 +// * Arrays have the form --model=[one.obj,two.obj,three.obj] +// (without whitespaces) +//////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////// +//! Check if command line argument \a flag-name is given +//! @return shrTRUE if command line argument \a flag_name has been given, +//! otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param flag_name name of command line flag +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv, + const char* flag_name); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type int +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv, + const char* arg_name, int* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type unsigned int +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv, + const char* arg_name, unsigned int* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type float +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv, + const char* arg_name, float* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type string +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv, + const char* arg_name, char** val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument list those element are strings +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val command line argument list +//! @param len length of the list / number of elements +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv, + const char* arg_name, char** val, + unsigned int* len); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparef( const float* reference, const float* data, + const unsigned int len); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two integer arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparei( const int* reference, const int* data, + const unsigned int len ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two unsigned integer arrays, with epsilon and threshold +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param threshold tolerance % # of comparison errors (0.15f = 15%) +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two unsigned char arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data, + const unsigned int len ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two integers with a tolernance for # of byte errors +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//! @param threshold tolerance % # of comparison errors (0.15f = 15%) +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two integer arrays witha n epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparefe( const float* reference, const float* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays with an epsilon tolerance for equality and a +//! threshold for # pixel errors +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparefet( const float* reference, const float* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays using L2-norm with an epsilon tolerance for +//! equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two PPM image files with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param src_file filename for the image to be compared +//! @param data filename for the reference data / gold image +//! @param epsilon epsilon to use for the comparison +//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) +//! $param verboseErrors output details of image mismatch to std::err +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two PGM image files with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param src_file filename for the image to be compared +//! @param data filename for the reference data / gold image +//! @param epsilon epsilon to use for the comparison +//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) +//! $param verboseErrors output details of image mismatch to std::err +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); + +extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size); + +extern "C" size_t shrRoundUp(int group_size, int global_size); + +// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) +// ********************************************************************* +inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) +{ + if (iReference != iSample) + { + shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile); + if (pCleanup != NULL) + { + pCleanup(EXIT_FAILURE); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); + exit(EXIT_FAILURE); + } + } +} + +// Standardized Exit +// ********************************************************************* +inline void __shrExitEX(int argc, const char** argv, int iExitCode) +{ +#ifdef WIN32 + if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) +#else + if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) +#endif + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress to Quit...\n"); + getchar(); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]); + } + fflush(stderr); + exit(iExitCode); +} + #endif \ No newline at end of file diff --git a/benchmarks/new_opencl/transpose/transpose.cl b/benchmarks/old_opencl/transpose/transpose.cl similarity index 100% rename from benchmarks/new_opencl/transpose/transpose.cl rename to benchmarks/old_opencl/transpose/transpose.cl diff --git a/benchmarks/new_opencl/transpose/transpose_gold.cpp b/benchmarks/old_opencl/transpose/transpose_gold.cpp similarity index 100% rename from benchmarks/new_opencl/transpose/transpose_gold.cpp rename to benchmarks/old_opencl/transpose/transpose_gold.cpp diff --git a/benchmarks/old_opencl/vecadd/Makefile b/benchmarks/old_opencl/vecadd/Makefile new file mode 100644 index 00000000..728ffb7b --- /dev/null +++ b/benchmarks/old_opencl/vecadd/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) +POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) +POCL_INC_PATH ?= $(wildcard ../include) +POCL_LIB_PATH ?= $(wildcard ../lib) +VX_RT_PATH ?= $(wildcard ../../../runtime) +VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a + +PROJECT = vecadd + +SRCS = main.cc + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/new_opencl/vecadd/README b/benchmarks/old_opencl/vecadd/README similarity index 100% rename from benchmarks/new_opencl/vecadd/README rename to benchmarks/old_opencl/vecadd/README diff --git a/benchmarks/new_opencl/vecadd/kernel.cl b/benchmarks/old_opencl/vecadd/kernel.cl similarity index 100% rename from benchmarks/new_opencl/vecadd/kernel.cl rename to benchmarks/old_opencl/vecadd/kernel.cl diff --git a/benchmarks/opencl/vecadd/libvecadd.a b/benchmarks/old_opencl/vecadd/libvecadd.a similarity index 100% rename from benchmarks/opencl/vecadd/libvecadd.a rename to benchmarks/old_opencl/vecadd/libvecadd.a diff --git a/benchmarks/new_opencl/vecadd/main.cc b/benchmarks/old_opencl/vecadd/main.cc similarity index 75% rename from benchmarks/new_opencl/vecadd/main.cc rename to benchmarks/old_opencl/vecadd/main.cc index 68c9675a..178111c1 100644 --- a/benchmarks/new_opencl/vecadd/main.cc +++ b/benchmarks/old_opencl/vecadd/main.cc @@ -1,12 +1,13 @@ #include #include -#include #include #include +#define MAX_KERNELS 1 +#define KERNEL_NAME "vecadd" +#define KERNEL_FILE_NAME "vecadd.pocl" #define SIZE 4 #define NUM_WORK_GROUPS 2 -#define KERNEL_NAME "vecadd" #define CL_CHECK(_expr) \ do { \ @@ -21,7 +22,7 @@ #define CL_CHECK2(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ + typeof(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ cleanup(); \ @@ -41,30 +42,9 @@ cl_mem c_memobj = NULL; cl_int *A = NULL; cl_int *B = NULL; cl_int *C = NULL; -uint8_t *kernel_bin = NULL; +char *binary = NULL; -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -static void cleanup() { +void cleanup() { if (commandQueue) clReleaseCommandQueue(commandQueue); if (kernel) clReleaseKernel(kernel); if (program) clReleaseProgram(program); @@ -72,46 +52,19 @@ static void cleanup() { if (b_memobj) clReleaseMemObject(b_memobj); if (c_memobj) clReleaseMemObject(c_memobj); if (context) clReleaseContext(context); - if (kernel_bin) free(kernel_bin); + if (binary) free(binary); if (A) free(A); if (B) free(B); if (C) free(C); } -static int find_device(char* name, cl_platform_id platform_id, cl_device_id *device_id) { - cl_device_id device_ids[64]; - cl_uint num_devices = 0; - - CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 64, device_ids, &num_devices)); - - for (int i=0; i $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/new_opencl/bfs/kernel.pocl b/benchmarks/opencl/bfs/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/bfs/kernel.pocl rename to benchmarks/opencl/bfs/kernel.pocl diff --git a/benchmarks/opencl/bfs/main.cc b/benchmarks/opencl/bfs/main.cc index 10a40c4c..138ec864 100755 --- a/benchmarks/opencl/bfs/main.cc +++ b/benchmarks/opencl/bfs/main.cc @@ -187,7 +187,7 @@ int main(int argc, char *argv[]) { FILE *fp; Node *h_graph_nodes; char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited; - + try { char *input_f = "graph4096.txt"; printf("Reading File\n"); diff --git a/benchmarks/new_opencl/compiler/bin/poclcc b/benchmarks/opencl/compiler/bin/poclcc similarity index 100% rename from benchmarks/new_opencl/compiler/bin/poclcc rename to benchmarks/opencl/compiler/bin/poclcc diff --git a/benchmarks/new_opencl/compiler/lib/libOpenCL.so b/benchmarks/opencl/compiler/lib/libOpenCL.so similarity index 100% rename from benchmarks/new_opencl/compiler/lib/libOpenCL.so rename to benchmarks/opencl/compiler/lib/libOpenCL.so diff --git a/benchmarks/new_opencl/compiler/lib/libOpenCL.so.2 b/benchmarks/opencl/compiler/lib/libOpenCL.so.2 similarity index 100% rename from benchmarks/new_opencl/compiler/lib/libOpenCL.so.2 rename to benchmarks/opencl/compiler/lib/libOpenCL.so.2 diff --git a/benchmarks/new_opencl/compiler/lib/libOpenCL.so.2.5.0 b/benchmarks/opencl/compiler/lib/libOpenCL.so.2.5.0 similarity index 100% rename from benchmarks/new_opencl/compiler/lib/libOpenCL.so.2.5.0 rename to benchmarks/opencl/compiler/lib/libOpenCL.so.2.5.0 diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_builtin_renames.h b/benchmarks/opencl/compiler/share/pocl/include/_builtin_renames.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_builtin_renames.h rename to benchmarks/opencl/compiler/share/pocl/include/_builtin_renames.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_clang_opencl.h b/benchmarks/opencl/compiler/share/pocl/include/_clang_opencl.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_clang_opencl.h rename to benchmarks/opencl/compiler/share/pocl/include/_clang_opencl.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_enable_all_exts.h b/benchmarks/opencl/compiler/share/pocl/include/_enable_all_exts.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_enable_all_exts.h rename to benchmarks/opencl/compiler/share/pocl/include/_enable_all_exts.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_kernel.h b/benchmarks/opencl/compiler/share/pocl/include/_kernel.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_kernel.h rename to benchmarks/opencl/compiler/share/pocl/include/_kernel.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_kernel_c.h b/benchmarks/opencl/compiler/share/pocl/include/_kernel_c.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_kernel_c.h rename to benchmarks/opencl/compiler/share/pocl/include/_kernel_c.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/_kernel_constants.h b/benchmarks/opencl/compiler/share/pocl/include/_kernel_constants.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/_kernel_constants.h rename to benchmarks/opencl/compiler/share/pocl/include/_kernel_constants.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/opencl-c-base.h b/benchmarks/opencl/compiler/share/pocl/include/opencl-c-base.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/opencl-c-base.h rename to benchmarks/opencl/compiler/share/pocl/include/opencl-c-base.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/opencl-c.h b/benchmarks/opencl/compiler/share/pocl/include/opencl-c.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/opencl-c.h rename to benchmarks/opencl/compiler/share/pocl/include/opencl-c.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/pocl.h b/benchmarks/opencl/compiler/share/pocl/include/pocl.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/pocl.h rename to benchmarks/opencl/compiler/share/pocl/include/pocl.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/pocl_device.h b/benchmarks/opencl/compiler/share/pocl/include/pocl_device.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/pocl_device.h rename to benchmarks/opencl/compiler/share/pocl/include/pocl_device.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/pocl_image_types.h b/benchmarks/opencl/compiler/share/pocl/include/pocl_image_types.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/pocl_image_types.h rename to benchmarks/opencl/compiler/share/pocl/include/pocl_image_types.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/pocl_spir.h b/benchmarks/opencl/compiler/share/pocl/include/pocl_spir.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/pocl_spir.h rename to benchmarks/opencl/compiler/share/pocl/include/pocl_spir.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/include/pocl_types.h b/benchmarks/opencl/compiler/share/pocl/include/pocl_types.h similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/include/pocl_types.h rename to benchmarks/opencl/compiler/share/pocl/include/pocl_types.h diff --git a/benchmarks/new_opencl/compiler/share/pocl/kernel-riscv32-unknown-unknown-elf-skylake.bc b/benchmarks/opencl/compiler/share/pocl/kernel-riscv32-unknown-unknown-elf-skylake.bc similarity index 100% rename from benchmarks/new_opencl/compiler/share/pocl/kernel-riscv32-unknown-unknown-elf-skylake.bc rename to benchmarks/opencl/compiler/share/pocl/kernel-riscv32-unknown-unknown-elf-skylake.bc diff --git a/benchmarks/new_opencl/convolution/.gitignore b/benchmarks/opencl/convolution/.gitignore similarity index 100% rename from benchmarks/new_opencl/convolution/.gitignore rename to benchmarks/opencl/convolution/.gitignore diff --git a/benchmarks/opencl/convolution/Makefile b/benchmarks/opencl/convolution/Makefile index 42c8605a..d0c878f7 100644 --- a/benchmarks/opencl/convolution/Makefile +++ b/benchmarks/opencl/convolution/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCLRT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex PROJECT = convolution SRCS = main.cpp utils.cpp -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif diff --git a/benchmarks/opencl/convolution/kernel.cl b/benchmarks/opencl/convolution/kernel.cl index e0e4da3a..ff56dc6f 100755 --- a/benchmarks/opencl/convolution/kernel.cl +++ b/benchmarks/opencl/convolution/kernel.cl @@ -1,54 +1,54 @@ -__kernel -void convolution( - __read_only image2d_t sourceImage, - __write_only image2d_t outputImage, - int rows, - int cols, - __constant float* filter, - int filterWidth, - sampler_t sampler) -{ - // Store each work-item’s unique row and column - int column = get_global_id(0); - int row = get_global_id(1); - - // Half the width of the filter is needed for indexing - // memory later - int halfWidth = (int)(filterWidth/2); - - // All accesses to images return data as four-element vector - // (i.e., float4), although only the 'x' component will contain - // meaningful data in this code - float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; - - // Iterator for the filter - int filterIdx = 0; - - // Each work-item iterates around its local area based on the - // size of the filter - int2 coords; // Coordinates for accessing the image - // Iterate the filter rows - for(int i = -halfWidth; i <= halfWidth; i++) { - coords.y = row + i; - - // Iterate over the filter columns - for(int j = -halfWidth; j <= halfWidth; j++) { - coords.x = column + j; - - float4 pixel; - // Read a pixel from the image. A single channel image - // stores the pixel in the 'x' coordinate of the returned - // vector. - pixel = read_imagef(sourceImage, sampler, coords); - sum.x += pixel.x * filter[filterIdx++]; - } - } - - // Copy the data to the output image if the - // work-item is in bounds - if(row < rows && column < cols) { - coords.x = column; - coords.y = row; - write_imagef(outputImage, coords, sum); - } +__kernel +void convolution( + __read_only image2d_t sourceImage, + __write_only image2d_t outputImage, + int rows, + int cols, + __constant float* filter, + int filterWidth, + sampler_t sampler) +{ + // Store each work-item’s unique row and column + int column = get_global_id(0); + int row = get_global_id(1); + + // Half the width of the filter is needed for indexing + // memory later + int halfWidth = (int)(filterWidth/2); + + // All accesses to images return data as four-element vector + // (i.e., float4), although only the 'x' component will contain + // meaningful data in this code + float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; + + // Iterator for the filter + int filterIdx = 0; + + // Each work-item iterates around its local area based on the + // size of the filter + int2 coords; // Coordinates for accessing the image + // Iterate the filter rows + for(int i = -halfWidth; i <= halfWidth; i++) { + coords.y = row + i; + + // Iterate over the filter columns + for(int j = -halfWidth; j <= halfWidth; j++) { + coords.x = column + j; + + float4 pixel; + // Read a pixel from the image. A single channel image + // stores the pixel in the 'x' coordinate of the returned + // vector. + pixel = read_imagef(sourceImage, sampler, coords); + sum.x += pixel.x * filter[filterIdx++]; + } + } + + // Copy the data to the output image if the + // work-item is in bounds + if(row < rows && column < cols) { + coords.x = column; + coords.y = row; + write_imagef(outputImage, coords, sum); + } } \ No newline at end of file diff --git a/benchmarks/opencl/convolution/main.cpp b/benchmarks/opencl/convolution/main.cpp index 5db2ae57..f5bf1584 100755 --- a/benchmarks/opencl/convolution/main.cpp +++ b/benchmarks/opencl/convolution/main.cpp @@ -1,261 +1,261 @@ -#include -#include -#include - -#include "utils.h" - -// This function takes a positive integer and rounds it up to -// the nearest multiple of another provided integer -unsigned int roundUp(unsigned int value, unsigned int multiple) { - - // Determine how far past the nearest multiple the value is - unsigned int remainder = value % multiple; - - // Add the difference to make the value a multiple - if(remainder != 0) { - value += (multiple-remainder); - } - - return value; -} - -// This function reads in a text file and stores it as a char pointer -char* readSource(char* kernelPath) { - - cl_int status; - FILE *fp; - char *source; - long int size; - - printf("Program file is: %s\n", kernelPath); - - fp = fopen(kernelPath, "rb"); - if(!fp) { - printf("Could not open kernel file\n"); - exit(-1); - } - status = fseek(fp, 0, SEEK_END); - if(status != 0) { - printf("Error seeking to end of file\n"); - exit(-1); - } - size = ftell(fp); - if(size < 0) { - printf("Error getting file position\n"); - exit(-1); - } - - rewind(fp); - - source = (char *)malloc(size + 1); - - int i; - for (i = 0; i < size+1; i++) { - source[i]='\0'; - } - - if(source == NULL) { - printf("Error allocating space for the kernel source\n"); - exit(-1); - } - - fread(source, 1, size, fp); - source[size] = '\0'; - - return source; -} - -void chk(cl_int status, const char* cmd) { - - if(status != CL_SUCCESS) { - printf("%s failed (%d)\n", cmd, status); - exit(-1); - } -} - -int main() { - - int i, j, k, l; - - // Rows and columns in the input image - int imageHeight; - int imageWidth; - - const char* inputFile = "input.bmp"; - const char* outputFile = "output.bmp"; - - // Homegrown function to read a BMP from file - float* inputImage = readImage(inputFile, &imageWidth, - &imageHeight); - - // Size of the input and output images on the host - int dataSize = imageHeight*imageWidth*sizeof(float); - - // Output image on the host - float* outputImage = NULL; - outputImage = (float*)malloc(dataSize); - float* refImage = NULL; - refImage = (float*)malloc(dataSize); - - // 45 degree motion blur - float filter[49] = - {0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, -1, 0, 1, 0, 0, - 0, 0, -2, 0, 2, 0, 0, - 0, 0, -1, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0}; - - // The convolution filter is 7x7 - int filterWidth = 7; - int filterSize = filterWidth*filterWidth; // Assume a square kernel - - // Set up the OpenCL environment - cl_int status; - - // Discovery platform - cl_platform_id platform; - status = clGetPlatformIDs(1, &platform, NULL); - chk(status, "clGetPlatformIDs"); - - // Discover device - cl_device_id device; - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); - chk(status, "clGetDeviceIDs"); - - // Create context - cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, - (cl_context_properties)(platform), 0}; - cl_context context; - context = clCreateContext(props, 1, &device, NULL, NULL, &status); - chk(status, "clCreateContext"); - - // Create command queue - cl_command_queue queue; - queue = clCreateCommandQueue(context, device, 0, &status); - chk(status, "clCreateCommandQueue"); - - // The image format describes how the data will be stored in memory - cl_image_format format; - format.image_channel_order = CL_R; // single channel - format.image_channel_data_type = CL_FLOAT; // float data type - - // Create space for the source image on the device - cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth, - imageHeight, 0, NULL, &status); - chk(status, "clCreateImage2D"); - - // Create space for the output image on the device - cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth, - imageHeight, 0, NULL, &status); - chk(status, "clCreateImage2D"); - - // Create space for the 7x7 filter on the device - cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float), - NULL, &status); - chk(status, "clCreateBuffer"); - - // Copy the source image to the device - size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from - size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension - status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region, - 0, 0, inputImage, 0, NULL, NULL); - chk(status, "clEnqueueWriteImage"); - - // Copy the 7x7 filter to the device - status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0, - filterSize*sizeof(float), filter, 0, NULL, NULL); - chk(status, "clEnqueueWriteBuffer"); - - // Create the image sampler - cl_sampler sampler = clCreateSampler(context, CL_FALSE, - CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); - chk(status, "clCreateSampler"); - - const char* source = readSource("kernel.cl"); - - // Create a program object with source and build it - cl_program program; - program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); - chk(status, "clCreateProgramWithSource"); - status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); - chk(status, "clBuildProgram"); - - // Create the kernel object - cl_kernel kernel; - kernel = clCreateKernel(program, "convolution", &status); - chk(status, "clCreateKernel"); - - // Set the kernel arguments - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); - status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); - status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight); - status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth); - status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter); - status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); - status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler); - chk(status, "clSetKernelArg"); - - // Set the work item dimensions - size_t globalSize[2] = {imageWidth, imageHeight}; - status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, - NULL, NULL); - chk(status, "clEnqueueNDRange"); - - // Read the image back to the host - status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, - region, 0, 0, outputImage, 0, NULL, NULL); - chk(status, "clEnqueueReadImage"); - - // Write the output image to file - storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); - - // Compute the reference image - for(i = 0; i < imageHeight; i++) { - for(j = 0; j < imageWidth; j++) { - refImage[i*imageWidth+j] = 0; - } - } - - // Iterate over the rows of the source image - int halfFilterWidth = filterWidth/2; - float sum; - for(i = 0; i < imageHeight; i++) { - // Iterate over the columns of the source image - for(j = 0; j < imageWidth; j++) { - sum = 0; // Reset sum for new source pixel - // Apply the filter to the neighborhood - for(k = - halfFilterWidth; k <= halfFilterWidth; k++) { - for(l = - halfFilterWidth; l <= halfFilterWidth; l++) { - if(i+k >= 0 && i+k < imageHeight && - j+l >= 0 && j+l < imageWidth) { - sum += inputImage[(i+k)*imageWidth + j+l] * - filter[(k+halfFilterWidth)*filterWidth + - l+halfFilterWidth]; - } - } - } - refImage[i*imageWidth+j] = sum; - } - } - - int failed = 0; - for(i = 0; i < imageHeight; i++) { - for(j = 0; j < imageWidth; j++) { - if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) { - printf("Results are INCORRECT\n"); - printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j, - outputImage[i*imageWidth+j], refImage[i*imageWidth+j]); - failed = 1; - } - if(failed) break; - } - if(failed) break; - } - if(!failed) { - printf("Results are correct\n"); - } - - return 0; +#include +#include +#include + +#include "utils.h" + +// This function takes a positive integer and rounds it up to +// the nearest multiple of another provided integer +unsigned int roundUp(unsigned int value, unsigned int multiple) { + + // Determine how far past the nearest multiple the value is + unsigned int remainder = value % multiple; + + // Add the difference to make the value a multiple + if(remainder != 0) { + value += (multiple-remainder); + } + + return value; +} + +// This function reads in a text file and stores it as a char pointer +char* readSource(char* kernelPath) { + + cl_int status; + FILE *fp; + char *source; + long int size; + + printf("Program file is: %s\n", kernelPath); + + fp = fopen(kernelPath, "rb"); + if(!fp) { + printf("Could not open kernel file\n"); + exit(-1); + } + status = fseek(fp, 0, SEEK_END); + if(status != 0) { + printf("Error seeking to end of file\n"); + exit(-1); + } + size = ftell(fp); + if(size < 0) { + printf("Error getting file position\n"); + exit(-1); + } + + rewind(fp); + + source = (char *)malloc(size + 1); + + int i; + for (i = 0; i < size+1; i++) { + source[i]='\0'; + } + + if(source == NULL) { + printf("Error allocating space for the kernel source\n"); + exit(-1); + } + + fread(source, 1, size, fp); + source[size] = '\0'; + + return source; +} + +void chk(cl_int status, const char* cmd) { + + if(status != CL_SUCCESS) { + printf("%s failed (%d)\n", cmd, status); + exit(-1); + } +} + +int main() { + + int i, j, k, l; + + // Rows and columns in the input image + int imageHeight; + int imageWidth; + + const char* inputFile = "input.bmp"; + const char* outputFile = "output.bmp"; + + // Homegrown function to read a BMP from file + float* inputImage = readImage(inputFile, &imageWidth, + &imageHeight); + + // Size of the input and output images on the host + int dataSize = imageHeight*imageWidth*sizeof(float); + + // Output image on the host + float* outputImage = NULL; + outputImage = (float*)malloc(dataSize); + float* refImage = NULL; + refImage = (float*)malloc(dataSize); + + // 45 degree motion blur + float filter[49] = + {0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, -1, 0, 1, 0, 0, + 0, 0, -2, 0, 2, 0, 0, + 0, 0, -1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0}; + + // The convolution filter is 7x7 + int filterWidth = 7; + int filterSize = filterWidth*filterWidth; // Assume a square kernel + + // Set up the OpenCL environment + cl_int status; + + // Discovery platform + cl_platform_id platform; + status = clGetPlatformIDs(1, &platform, NULL); + chk(status, "clGetPlatformIDs"); + + // Discover device + cl_device_id device; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); + chk(status, "clGetDeviceIDs"); + + // Create context + cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platform), 0}; + cl_context context; + context = clCreateContext(props, 1, &device, NULL, NULL, &status); + chk(status, "clCreateContext"); + + // Create command queue + cl_command_queue queue; + queue = clCreateCommandQueue(context, device, 0, &status); + chk(status, "clCreateCommandQueue"); + + // The image format describes how the data will be stored in memory + cl_image_format format; + format.image_channel_order = CL_R; // single channel + format.image_channel_data_type = CL_FLOAT; // float data type + + // Create space for the source image on the device + cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth, + imageHeight, 0, NULL, &status); + chk(status, "clCreateImage2D"); + + // Create space for the output image on the device + cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth, + imageHeight, 0, NULL, &status); + chk(status, "clCreateImage2D"); + + // Create space for the 7x7 filter on the device + cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float), + NULL, &status); + chk(status, "clCreateBuffer"); + + // Copy the source image to the device + size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from + size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension + status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region, + 0, 0, inputImage, 0, NULL, NULL); + chk(status, "clEnqueueWriteImage"); + + // Copy the 7x7 filter to the device + status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0, + filterSize*sizeof(float), filter, 0, NULL, NULL); + chk(status, "clEnqueueWriteBuffer"); + + // Create the image sampler + cl_sampler sampler = clCreateSampler(context, CL_FALSE, + CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); + chk(status, "clCreateSampler"); + + const char* source = readSource("kernel.cl"); + + // Create a program object with source and build it + cl_program program; + program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); + chk(status, "clCreateProgramWithSource"); + status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); + chk(status, "clBuildProgram"); + + // Create the kernel object + cl_kernel kernel; + kernel = clCreateKernel(program, "convolution", &status); + chk(status, "clCreateKernel"); + + // Set the kernel arguments + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); + status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); + status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight); + status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth); + status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter); + status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); + status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler); + chk(status, "clSetKernelArg"); + + // Set the work item dimensions + size_t globalSize[2] = {imageWidth, imageHeight}; + status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, + NULL, NULL); + chk(status, "clEnqueueNDRange"); + + // Read the image back to the host + status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, + region, 0, 0, outputImage, 0, NULL, NULL); + chk(status, "clEnqueueReadImage"); + + // Write the output image to file + storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); + + // Compute the reference image + for(i = 0; i < imageHeight; i++) { + for(j = 0; j < imageWidth; j++) { + refImage[i*imageWidth+j] = 0; + } + } + + // Iterate over the rows of the source image + int halfFilterWidth = filterWidth/2; + float sum; + for(i = 0; i < imageHeight; i++) { + // Iterate over the columns of the source image + for(j = 0; j < imageWidth; j++) { + sum = 0; // Reset sum for new source pixel + // Apply the filter to the neighborhood + for(k = - halfFilterWidth; k <= halfFilterWidth; k++) { + for(l = - halfFilterWidth; l <= halfFilterWidth; l++) { + if(i+k >= 0 && i+k < imageHeight && + j+l >= 0 && j+l < imageWidth) { + sum += inputImage[(i+k)*imageWidth + j+l] * + filter[(k+halfFilterWidth)*filterWidth + + l+halfFilterWidth]; + } + } + } + refImage[i*imageWidth+j] = sum; + } + } + + int failed = 0; + for(i = 0; i < imageHeight; i++) { + for(j = 0; j < imageWidth; j++) { + if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) { + printf("Results are INCORRECT\n"); + printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j, + outputImage[i*imageWidth+j], refImage[i*imageWidth+j]); + failed = 1; + } + if(failed) break; + } + if(failed) break; + } + if(!failed) { + printf("Results are correct\n"); + } + + return 0; } \ No newline at end of file diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile index 55f7620c..2584104d 100644 --- a/benchmarks/opencl/guassian/Makefile +++ b/benchmarks/opencl/guassian/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a - -PROJECT = gaussian +PROJECT = guassian SRCS = main.cc clutils.cpp utils.cpp -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/opencl/guassian/clutils.cpp b/benchmarks/opencl/guassian/clutils.cpp index 518a4a3e..32feef52 100755 --- a/benchmarks/opencl/guassian/clutils.cpp +++ b/benchmarks/opencl/guassian/clutils.cpp @@ -782,6 +782,27 @@ void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size) cl_unmapBuffer(mem, ptr); } +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + //------------------------------------------------------- // Program and kernels //------------------------------------------------------- @@ -839,11 +860,20 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos // Create the program object //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); - cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); + //cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); + // read kernel binary from file + uint8_t *kernel_bin = NULL; + size_t kernel_size; + cl_int binary_status = 0; + status = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); + cl_errChk(status, "read_kernel_file", true); + cl_program clProgramReturn = clCreateProgramWithBinary( + context, 1, &device, &kernel_size, &kernel_bin, &binary_status, &status); + free(kernel_bin); cl_errChk(status, "Creating program", true); - free(source); - fclose(fp); + //free(source); + //fclose(fp); // Try to compile the program status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); diff --git a/benchmarks/new_opencl/guassian/kernel.pocl b/benchmarks/opencl/guassian/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/guassian/kernel.pocl rename to benchmarks/opencl/guassian/kernel.pocl diff --git a/benchmarks/opencl/guassian/main.cc b/benchmarks/opencl/guassian/main.cc index 1b852908..45261cc6 100755 --- a/benchmarks/opencl/guassian/main.cc +++ b/benchmarks/opencl/guassian/main.cc @@ -94,10 +94,9 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size, cl_event writeEvent, kernelEvent, readEvent; float writeTime = 0, readTime = 0, kernelTime = 0; float writeMB = 0, readMB = 0; - - gaussianElim_program = - cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); - + + gaussianElim_program = cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); + fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status); status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true); if (status) diff --git a/benchmarks/new_opencl/kmeans/.gitignore b/benchmarks/opencl/kmeans/.gitignore similarity index 100% rename from benchmarks/new_opencl/kmeans/.gitignore rename to benchmarks/opencl/kmeans/.gitignore diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index d00909ed..d08ff166 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -1,79 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = kmeans -SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c -all: $(PROJECT).dump $(PROJECT).hex +SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +all: $(PROJECT) -kmeans_clustering.o: kmeans_clustering.c - $(CC) $(CXXFLAGS) -c kmeans_clustering.c +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -cluster.o: cluster.c - $(CC) $(CXXFLAGS) -c cluster.c +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -read_input.o: read_input.c - $(CC) $(CXXFLAGS) -c read_input.c +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -rmse.o: rmse.c - $(CC) $(CXXFLAGS) -c rmse.c +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu - -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex - -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump - -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif diff --git a/benchmarks/opencl/kmeans/getopt.c b/benchmarks/opencl/kmeans/getopt.c index fa2f3137..de98d917 100755 --- a/benchmarks/opencl/kmeans/getopt.c +++ b/benchmarks/opencl/kmeans/getopt.c @@ -1,1184 +1,1184 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to drepper@gnu.org - before changing it! - Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* This tells Alpha OSF/1 not to define a getopt prototype in . - Ditto for AIX 3.2 and . */ -#ifndef _NO_PROTO -# define _NO_PROTO -#endif - -#ifdef HAVE_CONFIG_H -# include -#endif - -#if !defined __STDC__ || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -# ifndef const -# define const -# endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#define GETOPT_INTERFACE_VERSION 2 -#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 -# include -# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION -# define ELIDE_CODE -# endif -#endif - -#ifndef ELIDE_CODE - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -/* Don't include stdlib.h for non-GNU C libraries because some of them - contain conflicting prototypes for getopt. */ -# include -# include -#endif /* GNU C library. */ - -#ifdef VMS -# include -# if HAVE_STRING_H - 0 -# include -# endif -#endif - -#ifndef _ -/* This is for other GNU distributions with internationalized messages. */ -# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC -# include -# ifndef _ -# define _(msgid) gettext (msgid) -# endif -# else -# define _(msgid) (msgid) -# endif -# if defined _LIBC && defined USE_IN_LIBIO -# include -# endif -#endif - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable POSIXLY_CORRECT disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* 1003.2 says this must be 1 before any call. */ -int optind = 1; - -/* Formerly, initialization of getopt depended on optind==0, which - causes problems with re-calling getopt as programs generally don't - know that. */ - -int __getopt_initialized; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters. - - PERMUTE is the default. We permute the contents of ARGV as we scan, - so that eventually all the non-options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code 1. - Using `-' as the first character of the list of option characters - selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return -1 with `optind' != ARGC. */ - -static enum -{ - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER -} ordering; - -/* Value of POSIXLY_CORRECT environment variable. */ -static char *posixly_correct; - -#ifdef __GNU_LIBRARY__ -/* We want to avoid inclusion of string.h with non-GNU libraries - because there are many ways it can cause trouble. - On some systems, it contains special magic macros that don't work - in GCC. */ -# include -# define my_index strchr -#else - -//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ -# include -//# else -//# include -//# endif - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -#ifndef getenv -extern char *getenv (); -#endif - -static char * -my_index (str, chr) - const char *str; - int chr; -{ - while (*str) - { - if (*str == chr) - return (char *) str; - str++; - } - return 0; -} - -/* If using GCC, we can safely declare strlen this way. - If not using GCC, it is ok not to declare it. */ -#ifdef __GNUC__ -/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. - That was relevant to code that was here before. */ -# if (!defined __STDC__ || !__STDC__) && !defined strlen -/* gcc with -traditional declares the built-in strlen to return int, - and has done so at least since version 2.4.5. -- rms. */ -extern int strlen (const char *); -# endif /* not __STDC__ */ -#endif /* __GNUC__ */ - -#endif /* not __GNU_LIBRARY__ */ - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -#ifdef _LIBC -/* Stored original parameters. - XXX This is no good solution. We should rather copy the args so - that we can compare them later. But we must not use malloc(3). */ -extern int __libc_argc; -extern char **__libc_argv; - -/* Bash 2.0 gives us an environment variable containing flags - indicating ARGV elements that should not be considered arguments. */ - -# ifdef USE_NONOPTION_FLAGS -/* Defined in getopt_init.c */ -extern char *__getopt_nonoption_flags; - -static int nonoption_flags_max_len; -static int nonoption_flags_len; -# endif - -# ifdef USE_NONOPTION_FLAGS -# define SWAP_FLAGS(ch1, ch2) \ - if (nonoption_flags_len > 0) \ - { \ - char __tmp = __getopt_nonoption_flags[ch1]; \ - __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ - __getopt_nonoption_flags[ch2] = __tmp; \ - } -# else -# define SWAP_FLAGS(ch1, ch2) -# endif -#else /* !_LIBC */ -# define SWAP_FLAGS(ch1, ch2) -#endif /* _LIBC */ - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -#if defined __STDC__ && __STDC__ -static void exchange (char **); -#endif - -static void -exchange (argv) - char **argv; -{ - int bottom = first_nonopt; - int middle = last_nonopt; - int top = optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - /* First make sure the handling of the `__getopt_nonoption_flags' - string can work normally. Our top argument must be in the range - of the string. */ - if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) - { - /* We must extend the array. The user plays games with us and - presents new arguments. */ - char *new_str = malloc (top + 1); - if (new_str == NULL) - nonoption_flags_len = nonoption_flags_max_len = 0; - else - { - memset (__mempcpy (new_str, __getopt_nonoption_flags, - nonoption_flags_max_len), - '\0', top + 1 - nonoption_flags_max_len); - nonoption_flags_max_len = top + 1; - __getopt_nonoption_flags = new_str; - } - } -#endif - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - SWAP_FLAGS (bottom + i, middle + i); - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Initialize the internal data when the first call is made. */ - -#if defined __STDC__ && __STDC__ -static const char *_getopt_initialize (int, char *const *, const char *); -#endif -static const char * -_getopt_initialize (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - first_nonopt = last_nonopt = optind; - - nextchar = NULL; - - posixly_correct = getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (posixly_correct != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - -#if defined _LIBC && defined USE_NONOPTION_FLAGS - if (posixly_correct == NULL - && argc == __libc_argc && argv == __libc_argv) - { - if (nonoption_flags_max_len == 0) - { - if (__getopt_nonoption_flags == NULL - || __getopt_nonoption_flags[0] == '\0') - nonoption_flags_max_len = -1; - else - { - const char *orig_str = __getopt_nonoption_flags; - int len = nonoption_flags_max_len = strlen (orig_str); - if (nonoption_flags_max_len < argc) - nonoption_flags_max_len = argc; - __getopt_nonoption_flags = - (char *) malloc (nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) - nonoption_flags_max_len = -1; - else - memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), - '\0', nonoption_flags_max_len - len); - } - } - nonoption_flags_len = nonoption_flags_max_len; - } - else - nonoption_flags_len = 0; -#endif - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns -1. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - The elements of ARGV aren't really const, because we permute them. - But we pretend they're const in the prototype to be compatible - with other systems. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. */ - -int -_getopt_internal (argc, argv, optstring, longopts, longind, long_only) - int argc; - char *const *argv; - const char *optstring; - const struct option *longopts; - int *longind; - int long_only; -{ - int print_errors = opterr; - if (optstring[0] == ':') - print_errors = 0; - - if (argc < 1) - return -1; - - optarg = NULL; - - if (optind == 0 || !__getopt_initialized) - { - if (optind == 0) - optind = 1; /* Don't scan ARGV[0], the program name. */ - optstring = _getopt_initialize (argc, argv, optstring); - __getopt_initialized = 1; - } - - /* Test whether ARGV[optind] points to a non-option argument. - Either it does not have option syntax, or there is an environment flag - from the shell indicating it is not an option. The later information - is only used when the used in the GNU libc. */ -#if defined _LIBC && defined USE_NONOPTION_FLAGS -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ - || (optind < nonoption_flags_len \ - && __getopt_nonoption_flags[optind] == '1')) -#else -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') -#endif - - if (nextchar == NULL || *nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been - moved back by the user (who may also have changed the arguments). */ - if (last_nonopt > optind) - last_nonopt = optind; - if (first_nonopt > optind) - first_nonopt = optind; - - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc && NONOPTION_P) - optind++; - last_nonopt = optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return -1; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if (NONOPTION_P) - { - if (ordering == REQUIRE_ORDER) - return -1; - optarg = argv[optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - nextchar = (argv[optind] + 1 - + (longopts != NULL && argv[optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[optind][1] == '-' - || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = -1; - int option_index; - - for (nameend = nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) - == (unsigned int) strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[optind]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[optind]); -#endif - } - nextchar += strlen (nextchar); - optind++; - optopt = 0; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (argv[optind - 1][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#else - fprintf (stderr, _("\ -%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[optind - 1][0], - pfound->name); -#else - fprintf (stderr, _("\ -%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[optind - 1][0], pfound->name); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - - nextchar += strlen (nextchar); - - optopt = pfound->val; - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); -#endif - } - nextchar += strlen (nextchar); - optopt = pfound->val; - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[optind][1] == '-' - || my_index (optstring, *nextchar) == NULL) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (argv[optind][1] == '-') - { - /* --option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), - argv[0], nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `--%s'\n"), - argv[0], nextchar); -#endif - } - else - { - /* +option or -option */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[optind][0], nextchar); -#else - fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[optind][0], nextchar); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - nextchar = (char *) ""; - optind++; - optopt = 0; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *nextchar++; - char *temp = my_index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == '\0') - ++optind; - - if (temp == NULL || c == ':') - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; -#endif - - if (posixly_correct) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: illegal option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); -#endif - } - else - { -#if defined _LIBC && defined USE_IN_LIBIO - __asprintf (&buf, _("%s: invalid option -- %c\n"), - argv[0], c); -#else - fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); -#endif - } - -#if defined _LIBC && defined USE_IN_LIBIO - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#endif - } - optopt = c; - return '?'; - } - /* Convenience. Treat POSIX -W foo same as long option --foo */ - if (temp[0] == 'W' && temp[1] == ';') - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = 0; - int option_index; - - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option requires an argument -- %c\n"), - argv[0], c); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - return c; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - - /* optarg is now the argument, see if it's in the - table of longopts. */ - - for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - if (ambig && !exact) - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[optind]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[optind]); -#endif - } - nextchar += strlen (nextchar); - optind++; - return '?'; - } - if (pfound != NULL) - { - option_index = indfound; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); -#endif - } - - nextchar += strlen (nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - { -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, _("\ -%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); -#endif - } - nextchar += strlen (nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - nextchar = NULL; - return 'W'; /* Let the application handle it. */ - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != '\0') - { - optarg = nextchar; - optind++; - } - else - optarg = NULL; - nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ -#if defined _LIBC && defined USE_IN_LIBIO - char *buf; - - __asprintf (&buf, - _("%s: option requires an argument -- %c\n"), - argv[0], c); - - if (_IO_fwide (stderr, 0) > 0) - __fwprintf (stderr, L"%s", buf); - else - fputs (buf, stderr); - - free (buf); -#else - fprintf (stderr, - _("%s: option requires an argument -- %c\n"), - argv[0], c); -#endif - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = NULL; - } - } - return c; - } -} - -int -getopt (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0); -} - -#endif /* Not ELIDE_CODE. */ - - -/* Compile with -DTEST to make an executable for use in testing +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing the above definition of `getopt'. */ \ No newline at end of file diff --git a/benchmarks/opencl/kmeans/getopt.h b/benchmarks/opencl/kmeans/getopt.h index bae04bf7..2a2e7577 100755 --- a/benchmarks/opencl/kmeans/getopt.h +++ b/benchmarks/opencl/kmeans/getopt.h @@ -1,191 +1,191 @@ - - -/* getopt.h */ -/* Declarations for getopt. - Copyright (C) 1989-1994, 1996-1999, 2001 Free Software - Foundation, Inc. This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute - it and/or modify it under the terms of the GNU Lesser - General Public License as published by the Free Software - Foundation; either version 2.1 of the License, or - (at your option) any later version. - - The GNU C Library is distributed in the hope that it will - be useful, but WITHOUT ANY WARRANTY; without even the - implied warranty of MERCHANTABILITY or FITNESS FOR A - PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General - Public License along with the GNU C Library; if not, write - to the Free Software Foundation, Inc., 59 Temple Place, - Suite 330, Boston, MA 02111-1307 USA. */ - - - - - -#ifndef _GETOPT_H - -#ifndef __need_getopt -# define _GETOPT_H 1 -#endif - -/* If __GNU_LIBRARY__ is not already defined, either we are being used - standalone, or this is the first header included in the source file. - If we are being used with glibc, we need to include , but - that does not exist if we are standalone. So: if __GNU_LIBRARY__ is - not defined, include , which will pull in for us - if it's from glibc. (Why ctype.h? It's guaranteed to exist and it - doesn't flood the namespace with stuff the way some other headers do.) */ -#if !defined __GNU_LIBRARY__ -# include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -#ifndef __need_getopt -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ -# if (defined __STDC__ && __STDC__) || defined __cplusplus - const char *name; -# else - char *name; -# endif - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -# define no_argument 0 -# define required_argument 1 -# define optional_argument 2 -#endif /* need getopt */ - - -/* Get definitions and prototypes for functions to process the - arguments in ARGV (ARGC of them, minus the program name) for - options given in OPTS. - - Return the option character from OPTS just read. Return -1 when - there are no more options. For unrecognized options, or options - missing arguments, `optopt' is set to the option letter, and '?' is - returned. - - The OPTS string is a list of characters which are recognized option - letters, optionally followed by colons, specifying that that letter - takes an argument, to be placed in `optarg'. - - If a letter in OPTS is followed by two colons, its argument is - optional. This behavior is specific to the GNU `getopt'. - - The argument `--' causes premature termination of argument - scanning, explicitly telling `getopt' that there are no more - options. - - If OPTS begins with `--', then non-option arguments are treated as - arguments to the option '\0'. This behavior is specific to the GNU - `getopt'. */ - -#if (defined __STDC__ && __STDC__) || defined __cplusplus -# ifdef __GNU_LIBRARY__ -/* Many other libraries have conflicting prototypes for getopt, with - differences in the consts, in stdlib.h. To avoid compilation - errors, only prototype getopt for the GNU C library. */ -extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); -# else /* not __GNU_LIBRARY__ */ -extern int getopt (); -# endif /* __GNU_LIBRARY__ */ - -# ifndef __need_getopt -extern int getopt_long (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind); -extern int getopt_long_only (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind); - -/* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int ___argc, char *const *___argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only); -# endif -#else /* not __STDC__ */ -extern int getopt (); -# ifndef __need_getopt -extern int getopt_long (); -extern int getopt_long_only (); - -extern int _getopt_internal (); -# endif -#endif /* __STDC__ */ - -#ifdef __cplusplus -} -#endif - -/* Make sure we later can get all the definitions and declarations. */ -#undef __need_getopt - -#endif /* getopt.h */ - + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/benchmarks/opencl/kmeans/kernel.cl b/benchmarks/opencl/kmeans/kernel.cl index 11ca065e..81089878 100755 --- a/benchmarks/opencl/kmeans/kernel.cl +++ b/benchmarks/opencl/kmeans/kernel.cl @@ -1,61 +1,61 @@ -#ifndef FLT_MAX -#define FLT_MAX 3.40282347e+38 -#endif - -__kernel void -kmeans_kernel_c(__global float *feature, - __global float *clusters, - __global int *membership, - int npoints, - int nclusters, - int nfeatures, - int offset, - int size - ) -{ - unsigned int point_id = get_global_id(0); - int index = 0; - //const unsigned int point_id = get_global_id(0); - if (point_id < npoints) - { - float min_dist=FLT_MAX; - for (int i=0; i < nclusters; i++) { - - float dist = 0; - float ans = 0; - for (int l=0; l -#include -#include -#include -#include "kmeans.h" - -#define RANDOM_MAX 2147483647 - -extern double wtime(void); - -/*----< kmeans_clustering() >---------------------------------------------*/ -float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ - int nfeatures, - int npoints, - int nclusters, - float threshold, - int *membership) /* out: [npoints] */ -{ - int i, j, n = 0; /* counters */ - int loop=0, temp; - int *new_centers_len; /* [nclusters]: no. of points in each cluster */ - float delta; /* if the point moved */ - float **clusters; /* out: [nclusters][nfeatures] */ - float **new_centers; /* [nclusters][nfeatures] */ - - int *initial; /* used to hold the index of points not yet selected - prevents the "birthday problem" of dual selection (?) - considered holding initial cluster indices, but changed due to - possible, though unlikely, infinite loops */ - int initial_points; - int c = 0; - - /* nclusters should never be > npoints - that would guarantee a cluster without points */ - if (nclusters > npoints) - nclusters = npoints; - - /* allocate space for and initialize returning variable clusters[] */ - clusters = (float**) malloc(nclusters * sizeof(float*)); - clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); - for (i=1; i= 0; i++) { - //n = (int)rand() % initial_points; - - for (j=0; j 0) - clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ - new_centers[i][j] = 0.0; /* set back to 0 */ - } - new_centers_len[i] = 0; /* set back to 0 */ - } - c++; - } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ - printf("iterated %d times\n", c); - free(new_centers[0]); - free(new_centers); - free(new_centers_len); - - return clusters; -} - +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/benchmarks/opencl/kmeans/main.cc b/benchmarks/opencl/kmeans/main.cc index f458ab4b..e6e97e3b 100755 --- a/benchmarks/opencl/kmeans/main.cc +++ b/benchmarks/opencl/kmeans/main.cc @@ -1,394 +1,382 @@ -#include "kmeans.h" -#include -#include -#include -#include -#include -#include - -#ifdef WIN -#include -#else -#include -#include -double gettime() { - struct timeval t; - gettimeofday(&t, NULL); - return t.tv_sec + t.tv_usec * 1e-6; -} -#endif - -#ifdef NV -#include -#else -#include -#endif - -#ifndef FLT_MAX -#define FLT_MAX 3.40282347e+38 -#endif - -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE RD_WG_SIZE -#else -#define BLOCK_SIZE 256 -#endif - -#ifdef RD_WG_SIZE_1_0 -#define BLOCK_SIZE2 RD_WG_SIZE_1_0 -#elif defined(RD_WG_SIZE_1) -#define BLOCK_SIZE2 RD_WG_SIZE_1 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE2 RD_WG_SIZE -#else -#define BLOCK_SIZE2 256 -#endif - -// local variables -static cl_context context; -static cl_command_queue cmd_queue; -static cl_device_type device_type; -static cl_device_id *device_list; -static cl_int num_devices; - - -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -static int initialize(int use_gpu) { - cl_int result; - size_t size; - - /*// create OpenCL context - cl_platform_id platform_id; - if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { - printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); - return -1; - } - cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, - (cl_context_properties)platform_id, 0}; - device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; - context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); - if (!context) { - printf("ERROR: clCreateContextFromType(%s) failed\n", - use_gpu ? "GPU" : "CPU"); - return -1; - } - - // get the list of GPUs - result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - num_devices = (int)(size / sizeof(cl_device_id)); - - if (result != CL_SUCCESS || num_devices < 1) { - printf("ERROR: clGetContextInfo() failed\n"); - return -1; - } - device_list = new cl_device_id[num_devices]; - if (!device_list) { - printf("ERROR: new cl_device_id[] failed\n"); - return -1; - } - result = - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); - if (result != CL_SUCCESS) { - printf("ERROR: clGetContextInfo() failed\n"); - return -1; - }*/ - - cl_platform_id platform_id; - num_devices = 1; - device_list = new cl_device_id[num_devices]; - - result = clGetPlatformIDs(1, &platform_id, NULL); - result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); - context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); - - // create command queue for the first device - cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); - if (!cmd_queue) { - printf("ERROR: clCreateCommandQueue() failed\n"); - return -1; - } - - return 0; -} - -static int shutdown() { - // release resources - if (cmd_queue) - clReleaseCommandQueue(cmd_queue); - if (context) - clReleaseContext(context); - if (device_list) - delete device_list; - - // reset all variables - cmd_queue = 0; - context = 0; - device_list = 0; - num_devices = 0; - device_type = 0; - - return 0; -} - -cl_mem d_feature; -cl_mem d_feature_swap; -cl_mem d_cluster; -cl_mem d_membership; - -cl_kernel kernel; -cl_kernel kernel_s; -cl_kernel kernel2; - -int *membership_OCL; -int *membership_d; -float *feature_d; -float *clusters_d; -float *center_d; - -uint8_t* kernel_bin = NULL; -size_t kernel_size = 0; -cl_int binary_status = 0; - - -int allocate(int n_points, int n_features, int n_clusters, float **feature) { - /*int sourcesize = 1024 * 1024; - char *source = (char *)calloc(sourcesize, sizeof(char)); - if (!source) { - printf("ERROR: calloc(%d) failed\n", sourcesize); - return -1; - } - - // read the kernel core source - char *tempchar = "./kmeans.cl"; - FILE *fp = fopen(tempchar, "rb"); - if (!fp) { - printf("ERROR: unable to open '%s'\n", tempchar); - return -1; - } - fread(source + strlen(source), sourcesize, 1, fp); - fclose(fp);*/ - - // OpenCL initialization - int use_gpu = 1; - if (initialize(use_gpu)) - return -1; - - // Load Kernel - if (read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) { - return -1; - } - - // compile kernel - cl_int err = 0; - //const char *slist[2] = {source, 0}; - //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); - cl_program prog = clCreateProgramWithBinary( - context, 1, device_list, &kernel_size, &kernel_bin, &binary_status, &err); - // cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateProgramWithSource() => %d\n", err); - return -1; - } - err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); - { // show warnings/errors - // static char log[65536]; memset(log, 0, sizeof(log)); - // cl_device_id device_id = 0; - // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), - //&device_id, NULL); - // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, - // sizeof(log)-1, log, NULL); - // if(err || strstr(log,"warning:") || strstr(log, "error:")) - // printf("<<<<\n%s\n>>>>\n", log); - } - if (err != CL_SUCCESS) { - printf("ERROR: clBuildProgram() => %d\n", err); - return -1; - } - - char *kernel_kmeans_c = "kmeans_kernel_c"; - char *kernel_swap = "kmeans_swap"; - - kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateKernel() 0 => %d\n", err); - return -1; - } - kernel2 = clCreateKernel(prog, kernel_swap, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateKernel() 0 => %d\n", err); - return -1; - } - - clReleaseProgram(prog); - - d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - d_feature_swap = - clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - d_cluster = - clCreateBuffer(context, CL_MEM_READ_WRITE, - n_clusters * n_features * sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", - n_clusters * n_features, err); - return -1; - } - d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, - n_points * sizeof(int), NULL, &err); - if (err != CL_SUCCESS) { - printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, - err); - return -1; - } - - // write buffers - err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, - n_points * n_features * sizeof(float), feature[0], - 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", - n_points * n_features, err); - return -1; - } - - clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); - clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); - clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); - clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); - - size_t global_work[3] = {n_points, 1, 1}; - /// Ke Wang adjustable local group size 2013/08/07 10:37:33 - size_t local_work_size = BLOCK_SIZE; // work group size is defined by - // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 - // 2014/06/10 17:00:51 - if (global_work[0] % local_work_size != 0) - global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; - - err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, - &local_work_size, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); - return -1; - } - - membership_OCL = (int *)malloc(n_points * sizeof(int)); -} - -void deallocateMemory() { - clReleaseMemObject(d_feature); - clReleaseMemObject(d_feature_swap); - clReleaseMemObject(d_cluster); - clReleaseMemObject(d_membership); - if (kernel_bin) free(kernel_bin); - free(membership_OCL); -} - -int main(int argc, char **argv) { - printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", - BLOCK_SIZE, BLOCK_SIZE2); - setup(argc, argv); - shutdown(); -} - -int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ - int n_features, int n_points, int n_clusters, int *membership, - float **clusters, int *new_centers_len, float **new_centers) { - - int delta = 0; - int i, j, k; - cl_int err = 0; - - size_t global_work[3] = {n_points, 1, 1}; - - /// Ke Wang adjustable local group size 2013/08/07 10:37:33 - size_t local_work_size = BLOCK_SIZE2; // work group size is defined by - // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 - // 2014/06/10 17:00:41 - if (global_work[0] % local_work_size != 0) - global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; - - err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, - n_clusters * n_features * sizeof(float), - clusters[0], 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, - err); - return -1; - } - - int size = 0; - int offset = 0; - - clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); - clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); - clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); - clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); - clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); - clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); - clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); - clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); - - err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, - &local_work_size, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); - return -1; - } - clFinish(cmd_queue); - err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, - n_points * sizeof(int), membership_OCL, 0, 0, 0); - if (err != CL_SUCCESS) { - printf("ERROR: Memcopy Out\n"); - return -1; - } - - delta = 0; - for (i = 0; i < n_points; i++) { - int cluster_id = membership_OCL[i]; - new_centers_len[cluster_id]++; - if (membership_OCL[i] != membership[i]) { - delta++; - membership[i] = membership_OCL[i]; - } - for (j = 0; j < n_features; j++) { - new_centers[cluster_id][j] += feature[i][j]; - } - } - - return delta; -} +#include "kmeans.h" +#include +#include +#include +#include +#include +#include + +#ifdef WIN +#include +#else +#include +#include +double gettime() { + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} +#endif + +#ifdef NV +#include +#else +#include +#endif + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 256 +#endif + +#ifdef RD_WG_SIZE_1_0 +#define BLOCK_SIZE2 RD_WG_SIZE_1_0 +#elif defined(RD_WG_SIZE_1) +#define BLOCK_SIZE2 RD_WG_SIZE_1 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE2 RD_WG_SIZE +#else +#define BLOCK_SIZE2 256 +#endif + +// local variables +static cl_context context; +static cl_command_queue cmd_queue; +static cl_device_type device_type; +static cl_device_id *device_list; +static cl_int num_devices; + +static int initialize(int use_gpu) { + cl_int result; + size_t size; + + /*// create OpenCL context + cl_platform_id platform_id; + if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { + printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); + return -1; + } + cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)platform_id, 0}; + device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; + context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); + if (!context) { + printf("ERROR: clCreateContextFromType(%s) failed\n", + use_gpu ? "GPU" : "CPU"); + return -1; + } + + // get the list of GPUs + result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + num_devices = (int)(size / sizeof(cl_device_id)); + + if (result != CL_SUCCESS || num_devices < 1) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + } + device_list = new cl_device_id[num_devices]; + if (!device_list) { + printf("ERROR: new cl_device_id[] failed\n"); + return -1; + } + result = + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); + if (result != CL_SUCCESS) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + }*/ + + cl_platform_id platform_id; + num_devices = 1; + device_list = new cl_device_id[num_devices]; + + result = clGetPlatformIDs(1, &platform_id, NULL); + result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); + context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); + + // create command queue for the first device + cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); + if (!cmd_queue) { + printf("ERROR: clCreateCommandQueue() failed\n"); + return -1; + } + + return 0; +} + +static int shutdown() { + // release resources + if (cmd_queue) + clReleaseCommandQueue(cmd_queue); + if (context) + clReleaseContext(context); + if (device_list) + delete device_list; + + // reset all variables + cmd_queue = 0; + context = 0; + device_list = 0; + num_devices = 0; + device_type = 0; + + return 0; +} + +cl_mem d_feature; +cl_mem d_feature_swap; +cl_mem d_cluster; +cl_mem d_membership; + +cl_kernel kernel; +cl_kernel kernel_s; +cl_kernel kernel2; + +int *membership_OCL; +int *membership_d; +float *feature_d; +float *clusters_d; +float *center_d; + + +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + + +int allocate(int n_points, int n_features, int n_clusters, float **feature) { + /*int sourcesize = 1024 * 1024; + char *source = (char *)calloc(sourcesize, sizeof(char)); + if (!source) { + printf("ERROR: calloc(%d) failed\n", sourcesize); + return -1; + } + + // read the kernel core source + char *tempchar = "./kmeans.cl"; + FILE *fp = fopen(tempchar, "rb"); + if (!fp) { + printf("ERROR: unable to open '%s'\n", tempchar); + return -1; + } + fread(source + strlen(source), sourcesize, 1, fp); + fclose(fp);*/ + + // OpenCL initialization + int use_gpu = 1; + if (initialize(use_gpu)) + return -1; + + // compile kernel + cl_int err = 0; + //const char *slist[2] = {source, 0}; + //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); + cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateProgramWithSource() => %d\n", err); + return -1; + } + err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); + { // show warnings/errors + // static char log[65536]; memset(log, 0, sizeof(log)); + // cl_device_id device_id = 0; + // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), + //&device_id, NULL); + // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, + // sizeof(log)-1, log, NULL); + // if(err || strstr(log,"warning:") || strstr(log, "error:")) + // printf("<<<<\n%s\n>>>>\n", log); + } + if (err != CL_SUCCESS) { + printf("ERROR: clBuildProgram() => %d\n", err); + return -1; + } + + char *kernel_kmeans_c = "kmeans_kernel_c"; + char *kernel_swap = "kmeans_swap"; + + kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + kernel2 = clCreateKernel(prog, kernel_swap, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + + clReleaseProgram(prog); + + d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_feature_swap = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_cluster = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_clusters * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", + n_clusters * n_features, err); + return -1; + } + d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * sizeof(int), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, + err); + return -1; + } + + // write buffers + err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, + n_points * n_features * sizeof(float), feature[0], + 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + + clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); + clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); + + size_t global_work[3] = {n_points, 1, 1}; + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE; // work group size is defined by + // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + // 2014/06/10 17:00:51 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + + membership_OCL = (int *)malloc(n_points * sizeof(int)); +} + +void deallocateMemory() { + clReleaseMemObject(d_feature); + clReleaseMemObject(d_feature_swap); + clReleaseMemObject(d_cluster); + clReleaseMemObject(d_membership); + free(membership_OCL); +} + +int main(int argc, char **argv) { + printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", + BLOCK_SIZE, BLOCK_SIZE2); + setup(argc, argv); + shutdown(); +} + +int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ + int n_features, int n_points, int n_clusters, int *membership, + float **clusters, int *new_centers_len, float **new_centers) { + + int delta = 0; + int i, j, k; + cl_int err = 0; + + size_t global_work[3] = {n_points, 1, 1}; + + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE2; // work group size is defined by + // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 + // 2014/06/10 17:00:41 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, + n_clusters * n_features * sizeof(float), + clusters[0], 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, + err); + return -1; + } + + int size = 0; + int offset = 0; + + clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); + clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); + clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); + clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); + clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); + clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); + + err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + clFinish(cmd_queue); + err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, + n_points * sizeof(int), membership_OCL, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: Memcopy Out\n"); + return -1; + } + + delta = 0; + for (i = 0; i < n_points; i++) { + int cluster_id = membership_OCL[i]; + new_centers_len[cluster_id]++; + if (membership_OCL[i] != membership[i]) { + delta++; + membership[i] = membership_OCL[i]; + } + for (j = 0; j < n_features; j++) { + new_centers[cluster_id][j] += feature[i][j]; + } + } + + return delta; +} diff --git a/benchmarks/new_opencl/lib/libOpenCL.so b/benchmarks/opencl/lib/libOpenCL.so similarity index 100% rename from benchmarks/new_opencl/lib/libOpenCL.so rename to benchmarks/opencl/lib/libOpenCL.so diff --git a/benchmarks/new_opencl/lib/libOpenCL.so.2 b/benchmarks/opencl/lib/libOpenCL.so.2 similarity index 100% rename from benchmarks/new_opencl/lib/libOpenCL.so.2 rename to benchmarks/opencl/lib/libOpenCL.so.2 diff --git a/benchmarks/new_opencl/lib/libOpenCL.so.2.5.0 b/benchmarks/opencl/lib/libOpenCL.so.2.5.0 similarity index 100% rename from benchmarks/new_opencl/lib/libOpenCL.so.2.5.0 rename to benchmarks/opencl/lib/libOpenCL.so.2.5.0 diff --git a/benchmarks/opencl/nearn/Makefile b/benchmarks/opencl/nearn/Makefile index 1af0e492..3fbd20e0 100644 --- a/benchmarks/opencl/nearn/Makefile +++ b/benchmarks/opencl/nearn/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = nearn SRCS = main.cc clutils.cpp utils.cpp -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/opencl/nearn/clutils.cpp b/benchmarks/opencl/nearn/clutils.cpp index cd0dbb2f..6bc42304 100755 --- a/benchmarks/opencl/nearn/clutils.cpp +++ b/benchmarks/opencl/nearn/clutils.cpp @@ -88,7 +88,6 @@ static cl_command_queue commandQueueNoProf = NULL; //! Global status of events static bool eventsEnabled = false; - //------------------------------------------------------- // Initialization and Cleanup //------------------------------------------------------- @@ -239,6 +238,28 @@ static bool eventsEnabled = false; return context; }*/ +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + + cl_context cl_init_context(int platform, int dev,int quiet) { int printInfo=1; if (platform >= 0 && dev >= 0) printInfo = 0; @@ -837,13 +858,22 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos fread(source, 1, size, fp); source[size] = '\0';*/ + // read kernel binary from file + uint8_t *kernel_bin = NULL; + size_t kernel_size; + cl_int binary_status = 0; + int err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size); + cl_errChk(err, "read_kernel_file", true); + // Create the program object - //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); - cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "NearestNeighbor", &status); + //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); + cl_program clProgramReturn = clCreateProgramWithBinary( + context, 1, devices, &kernel_size, &kernel_bin, &binary_status, &status); + free(kernel_bin); cl_errChk(status, "Creating program", true); - free(source); - fclose(fp); + //free(source); + //fclose(fp); // Try to compile the program status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); diff --git a/benchmarks/new_opencl/nearn/kernel.pocl b/benchmarks/opencl/nearn/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/nearn/kernel.pocl rename to benchmarks/opencl/nearn/kernel.pocl diff --git a/benchmarks/new_opencl/results.txt b/benchmarks/opencl/results.txt similarity index 100% rename from benchmarks/new_opencl/results.txt rename to benchmarks/opencl/results.txt diff --git a/benchmarks/new_opencl/runtime/include/CL/cl.h b/benchmarks/opencl/runtime/include/CL/cl.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl.h rename to benchmarks/opencl/runtime/include/CL/cl.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl.hpp b/benchmarks/opencl/runtime/include/CL/cl.hpp similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl.hpp rename to benchmarks/opencl/runtime/include/CL/cl.hpp diff --git a/benchmarks/new_opencl/runtime/include/CL/cl2.hpp b/benchmarks/opencl/runtime/include/CL/cl2.hpp similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl2.hpp rename to benchmarks/opencl/runtime/include/CL/cl2.hpp diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_d3d10.h b/benchmarks/opencl/runtime/include/CL/cl_d3d10.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_d3d10.h rename to benchmarks/opencl/runtime/include/CL/cl_d3d10.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_d3d11.h b/benchmarks/opencl/runtime/include/CL/cl_d3d11.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_d3d11.h rename to benchmarks/opencl/runtime/include/CL/cl_d3d11.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_dx9_media_sharing.h b/benchmarks/opencl/runtime/include/CL/cl_dx9_media_sharing.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_dx9_media_sharing.h rename to benchmarks/opencl/runtime/include/CL/cl_dx9_media_sharing.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_dx9_media_sharing_intel.h b/benchmarks/opencl/runtime/include/CL/cl_dx9_media_sharing_intel.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_dx9_media_sharing_intel.h rename to benchmarks/opencl/runtime/include/CL/cl_dx9_media_sharing_intel.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_egl.h b/benchmarks/opencl/runtime/include/CL/cl_egl.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_egl.h rename to benchmarks/opencl/runtime/include/CL/cl_egl.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_ext.h b/benchmarks/opencl/runtime/include/CL/cl_ext.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_ext.h rename to benchmarks/opencl/runtime/include/CL/cl_ext.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_ext_intel.h b/benchmarks/opencl/runtime/include/CL/cl_ext_intel.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_ext_intel.h rename to benchmarks/opencl/runtime/include/CL/cl_ext_intel.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_gl.h b/benchmarks/opencl/runtime/include/CL/cl_gl.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_gl.h rename to benchmarks/opencl/runtime/include/CL/cl_gl.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_gl_ext.h b/benchmarks/opencl/runtime/include/CL/cl_gl_ext.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_gl_ext.h rename to benchmarks/opencl/runtime/include/CL/cl_gl_ext.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_platform.h b/benchmarks/opencl/runtime/include/CL/cl_platform.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_platform.h rename to benchmarks/opencl/runtime/include/CL/cl_platform.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_va_api_media_sharing_intel.h b/benchmarks/opencl/runtime/include/CL/cl_va_api_media_sharing_intel.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_va_api_media_sharing_intel.h rename to benchmarks/opencl/runtime/include/CL/cl_va_api_media_sharing_intel.h diff --git a/benchmarks/new_opencl/runtime/include/CL/cl_version.h b/benchmarks/opencl/runtime/include/CL/cl_version.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/cl_version.h rename to benchmarks/opencl/runtime/include/CL/cl_version.h diff --git a/benchmarks/new_opencl/runtime/include/CL/opencl.h b/benchmarks/opencl/runtime/include/CL/opencl.h similarity index 100% rename from benchmarks/new_opencl/runtime/include/CL/opencl.h rename to benchmarks/opencl/runtime/include/CL/opencl.h diff --git a/benchmarks/new_opencl/runtime/lib/libOpenCL.so b/benchmarks/opencl/runtime/lib/libOpenCL.so similarity index 100% rename from benchmarks/new_opencl/runtime/lib/libOpenCL.so rename to benchmarks/opencl/runtime/lib/libOpenCL.so diff --git a/benchmarks/new_opencl/runtime/lib/libOpenCL.so.2 b/benchmarks/opencl/runtime/lib/libOpenCL.so.2 similarity index 100% rename from benchmarks/new_opencl/runtime/lib/libOpenCL.so.2 rename to benchmarks/opencl/runtime/lib/libOpenCL.so.2 diff --git a/benchmarks/new_opencl/runtime/lib/libOpenCL.so.2.5.0 b/benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 similarity index 100% rename from benchmarks/new_opencl/runtime/lib/libOpenCL.so.2.5.0 rename to benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 6f6ae366..55773533 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = saxpy - SRCS = main.cc +SRCS = main.cc -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/new_opencl/saxpy/kernel.pocl b/benchmarks/opencl/saxpy/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/saxpy/kernel.pocl rename to benchmarks/opencl/saxpy/kernel.pocl diff --git a/benchmarks/opencl/saxpy/main.cc b/benchmarks/opencl/saxpy/main.cc index 9cf5d774..dd952d46 100644 --- a/benchmarks/opencl/saxpy/main.cc +++ b/benchmarks/opencl/saxpy/main.cc @@ -45,7 +45,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - typeof(_expr) _ret = _expr; \ + decltype(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -58,6 +58,29 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + +uint8_t *kernel_bin = NULL; + /// // Cleanup any created OpenCL resources // @@ -78,6 +101,8 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); + + if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -85,9 +110,14 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t binary_size; + size_t kernel_size; + cl_int binary_status = 0; int i; + // read kernel binary from file + if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) + return -1; + // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -105,8 +135,8 @@ int main(int argc, char **argv) { // If that is not available, then create the program from source // and store the binary for future use. std::cout << "Attempting to create program from binary..." << std::endl; - cl_program program = - clCreateProgramWithBuiltInKernels(context, 1, &device_id, "saxpy", NULL); + cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( + context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); @@ -153,7 +183,7 @@ int main(int argc, char **argv) { } cl_event kernel_completion; - size_t global_work_size[1] = {NUM_DATA}; + size_t global_work_size[] = {NUM_DATA/2,NUM_DATA/2}; printf("attempting to enqueue kernel\n"); fflush(stdout); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index edb4aab3..d1d0a41d 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = sfilter - SRCS = main.cc +SRCS = main.cc -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/new_opencl/sfilter/kernel.pocl b/benchmarks/opencl/sfilter/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/sfilter/kernel.pocl rename to benchmarks/opencl/sfilter/kernel.pocl diff --git a/benchmarks/opencl/sfilter/main.cc b/benchmarks/opencl/sfilter/main.cc index d29beff0..637d591a 100644 --- a/benchmarks/opencl/sfilter/main.cc +++ b/benchmarks/opencl/sfilter/main.cc @@ -48,7 +48,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - typeof(_expr) _ret = _expr; \ + decltype(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -60,6 +60,30 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data) { fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } + +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + +uint8_t *kernel_bin = NULL; + // inlcude pocl float to half conversions typedef union { int32_t i; @@ -151,6 +175,8 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); + + if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -158,9 +184,14 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t binary_size; + size_t kernel_size; + cl_int binary_status = 0; int i; + // read kernel binary from file + if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) + return -1; + // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -178,7 +209,8 @@ int main(int argc, char **argv) { // If that is not available, then create the program from source // and store the binary for future use. std::cout << "Attempting to create program from binary..." << std::endl; - cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sfilter", NULL); + cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( + context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); diff --git a/benchmarks/opencl/sgemm/Makefile b/benchmarks/opencl/sgemm/Makefile index ce7a3d1f..b2817ae4 100644 --- a/benchmarks/opencl/sgemm/Makefile +++ b/benchmarks/opencl/sgemm/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = sgemm SRCS = main.cc -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/new_opencl/sgemm/kernel.pocl b/benchmarks/opencl/sgemm/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/sgemm/kernel.pocl rename to benchmarks/opencl/sgemm/kernel.pocl diff --git a/benchmarks/opencl/sgemm/main.cc b/benchmarks/opencl/sgemm/main.cc index 64e605a0..2b72d1e5 100644 --- a/benchmarks/opencl/sgemm/main.cc +++ b/benchmarks/opencl/sgemm/main.cc @@ -46,7 +46,7 @@ #define CL_CHECK_ERR(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - typeof(_expr) _ret = _expr; \ + decltype(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ abort(); \ @@ -59,6 +59,29 @@ void pfn_notify(const char *errinfo, const void *private_info, size_t cb, fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); } +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + +uint8_t *kernel_bin = NULL; + /// // Cleanup any created OpenCL resources // @@ -79,6 +102,8 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); + + if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { @@ -86,9 +111,14 @@ int main(int argc, char **argv) { cl_platform_id platform_id; cl_device_id device_id; - size_t binary_size; + size_t kernel_size; + cl_int binary_status = 0; int i; + // read kernel binary from file + if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) + return -1; + // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); @@ -110,8 +140,8 @@ int main(int argc, char **argv) { std::cout << "Attempting to create program from binary..." << std::endl; // cl_program program = CreateProgramFromBinary(context, device_id, // "kernel.cl.bin"); - cl_program program = - clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sgemm", NULL); + cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( + context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; Cleanup(context, queue, program, kernel, memObjects); @@ -146,7 +176,7 @@ int main(int argc, char **argv) { memObjects[1] = input_bufferB; memObjects[2] = output_buffer; - size_t width = NUM_DATA; + int width = NUM_DATA; printf("attempting to create kernel\n"); fflush(stdout); diff --git a/benchmarks/new_opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm similarity index 100% rename from benchmarks/new_opencl/sgemm/sgemm rename to benchmarks/opencl/sgemm/sgemm diff --git a/benchmarks/new_opencl/transpose/.gitignore b/benchmarks/opencl/transpose/.gitignore similarity index 100% rename from benchmarks/new_opencl/transpose/.gitignore rename to benchmarks/opencl/transpose/.gitignore diff --git a/benchmarks/opencl/transpose/Makefile b/benchmarks/opencl/transpose/Makefile index 3e0e68b0..c122c176 100644 --- a/benchmarks/opencl/transpose/Makefile +++ b/benchmarks/opencl/transpose/Makefile @@ -1,66 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCLRT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld +LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) -I. +PROJECT = transpose -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +SRCS = main.cc transpose_gold.cpp -PROJECT=transpose +all: $(PROJECT) -all: $(PROJECT).dump $(PROJECT).hex +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -lib$(PROJECT).a: transpose.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).elf: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf +run-ase: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) main.cc transpose_gold.cpp $(QEMU_LIBS) -o $(PROJECT).qemu +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump - -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.elf *.dump *.hex + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif diff --git a/benchmarks/opencl/transpose/main.cc b/benchmarks/opencl/transpose/main.cc index 26122ab4..f72cb851 100644 --- a/benchmarks/opencl/transpose/main.cc +++ b/benchmarks/opencl/transpose/main.cc @@ -20,8 +20,8 @@ */ // standard utility and system includes -#include -#include +#include "oclUtils.h" +#include "shrQATest.h" #define BLOCK_DIM 16 @@ -152,7 +152,28 @@ double transposeGPU(const char* kernelName, bool useLocalMem, cl_uint ciDeviceC return time; } +uint8_t *kernel_bin = NULL; +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} //! Run a simple test for CUDA // ********************************************************************* int runTest( const int argc, const char** argv) @@ -286,10 +307,11 @@ int runTest( const int argc, const char** argv) //oclCheckError(source_path != NULL, shrTRUE); char *source = oclLoadProgSource(source_path, "", &program_length); //oclCheckError(source != NULL, shrTRUE); - + size_t kernel_size; + cl_int binary_status = 0; + cl_device_id device_id; // create the program - rv_program = - clCreateProgramWithBuiltInKernels(context, 1, &device_id, "transpose", NULL); + rv_program = clCreateProgramWithBinary(cxGPUContext, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, NULL); //rv_program = clCreateProgramWithSource(cxGPUContext, 1, // (const char **)&source, &program_length, &ciErrNum); //oclCheckError(ciErrNum, CL_SUCCESS); diff --git a/benchmarks/opencl/transpose/oclUtils.h b/benchmarks/opencl/transpose/oclUtils.h index 2b109e18..096612a8 100644 --- a/benchmarks/opencl/transpose/oclUtils.h +++ b/benchmarks/opencl/transpose/oclUtils.h @@ -1,198 +1,198 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef OCL_UTILS_H -#define OCL_UTILS_H - -// ********************************************************************* -// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK -// ********************************************************************* - -// Common headers: Cross-API utililties and OpenCL header -#include - -// All OpenCL headers -#if defined (__APPLE__) || defined(MACOSX) - #include -#else - #include -#endif - -// Includes -#include -#include -#include - -// For systems with CL_EXT that are not updated with these extensions, we copied these -// extensions from -#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ - #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 - #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 - #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 - #define CL_DEVICE_WARP_SIZE_NV 0x4003 - #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 - #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 - #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 -#endif - -// reminders for build output window and log -#ifdef _WIN32 - #pragma message ("Note: including shrUtils.h") - #pragma message ("Note: including opencl.h") -#endif - -// SDK Revision # -#define OCL_SDKREVISION "7027912" - -// Error and Exit Handling Macros... -// ********************************************************************* -// Full error handling macro with Cleanup() callback (if supplied)... -// (Companion Inline Function lower on page) -#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__) - -// Short version without Cleanup() callback pointer -// Both Input (a) and Reference (b) are specified as args -#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0) - -////////////////////////////////////////////////////////////////////////////// -//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0 -//! -//! @return the id -//! @param clSelectedPlatformID OpenCL platform ID -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID); - -////////////////////////////////////////////////////////////////////////////// -//! Print info about the device -//! -//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Get and return device capability -//! -//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" int oclGetDevCap(cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Print the device name -//! -//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE -//! @param device OpenCL id of the device -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclPrintDevName(int iLogMode, cl_device_id device); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the first device from the context -//! -//! @return the id -//! @param cxGPUContext OpenCL context -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the nth device from the context -//! -//! @return the id or -1 when out of range -//! @param cxGPUContext OpenCL context -//! @param device_idx index of the device of interest -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx); - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of device with maximal FLOPS from the context -//! -//! @return the id -//! @param cxGPUContext OpenCL context -////////////////////////////////////////////////////////////////////////////// -extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext); - -////////////////////////////////////////////////////////////////////////////// -//! Loads a Program file and prepends the cPreamble to the code. -//! -//! @return the source string if succeeded, 0 otherwise -//! @param cFilename program filename -//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header -//! @param szFinalLength returned length of the code string -////////////////////////////////////////////////////////////////////////////// -extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); - -////////////////////////////////////////////////////////////////////////////// -//! Get the binary (PTX) of the program associated with the device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -//! @param binary returned code -//! @param length length of returned code -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length); - -////////////////////////////////////////////////////////////////////////////// -//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -//! @param const char* cPtxFileName optional PTX file name -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName); - -////////////////////////////////////////////////////////////////////////////// -//! Get and log the Build Log from the OpenCL compiler for the requested program & device -//! -//! @param cpProgram OpenCL program -//! @param cdDevice device of interest -////////////////////////////////////////////////////////////////////////////// -extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); - -// Helper function for De-allocating cl objects -// ********************************************************************* -extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs); - -// Helper function to get OpenCL error string from constant -// ********************************************************************* -extern "C" const char* oclErrorString(cl_int error); - -// Helper function to get OpenCL image format string (channel order and type) from constant -// ********************************************************************* -extern "C" const char* oclImageFormatString(cl_uint uiImageFormat); - -// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) -// ********************************************************************* -inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) -{ - // An error condition is defined by the sample/test value not equal to the reference - if (iReference != iSample) - { - // If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value - iSample = (iSample == 0) ? -9999 : iSample; - - // Log the error info - shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile); - - // Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code. - if (pCleanup != NULL) - { - pCleanup(iSample); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); - exit(iSample); - } - } -} - +/* + * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +#ifndef OCL_UTILS_H +#define OCL_UTILS_H + +// ********************************************************************* +// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK +// ********************************************************************* + +// Common headers: Cross-API utililties and OpenCL header +#include "shrUtils.h" + +// All OpenCL headers +#if defined (__APPLE__) || defined(MACOSX) + #include +#else + #include +#endif + +// Includes +#include +#include +#include + +// For systems with CL_EXT that are not updated with these extensions, we copied these +// extensions from +#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ + #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 + #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 + #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 + #define CL_DEVICE_WARP_SIZE_NV 0x4003 + #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 + #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 + #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +#endif + +// reminders for build output window and log +#ifdef _WIN32 + #pragma message ("Note: including shrUtils.h") + #pragma message ("Note: including opencl.h") +#endif + +// SDK Revision # +#define OCL_SDKREVISION "7027912" + +// Error and Exit Handling Macros... +// ********************************************************************* +// Full error handling macro with Cleanup() callback (if supplied)... +// (Companion Inline Function lower on page) +#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__) + +// Short version without Cleanup() callback pointer +// Both Input (a) and Reference (b) are specified as args +#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0) + +////////////////////////////////////////////////////////////////////////////// +//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0 +//! +//! @return the id +//! @param clSelectedPlatformID OpenCL platform ID +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID); + +////////////////////////////////////////////////////////////////////////////// +//! Print info about the device +//! +//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Get and return device capability +//! +//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" int oclGetDevCap(cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Print the device name +//! +//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclPrintDevName(int iLogMode, cl_device_id device); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the first device from the context +//! +//! @return the id +//! @param cxGPUContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the nth device from the context +//! +//! @return the id or -1 when out of range +//! @param cxGPUContext OpenCL context +//! @param device_idx index of the device of interest +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx); + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of device with maximal FLOPS from the context +//! +//! @return the id +//! @param cxGPUContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext); + +////////////////////////////////////////////////////////////////////////////// +//! Loads a Program file and prepends the cPreamble to the code. +//! +//! @return the source string if succeeded, 0 otherwise +//! @param cFilename program filename +//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header +//! @param szFinalLength returned length of the code string +////////////////////////////////////////////////////////////////////////////// +extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); + +////////////////////////////////////////////////////////////////////////////// +//! Get the binary (PTX) of the program associated with the device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +//! @param binary returned code +//! @param length length of returned code +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length); + +////////////////////////////////////////////////////////////////////////////// +//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +//! @param const char* cPtxFileName optional PTX file name +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName); + +////////////////////////////////////////////////////////////////////////////// +//! Get and log the Build Log from the OpenCL compiler for the requested program & device +//! +//! @param cpProgram OpenCL program +//! @param cdDevice device of interest +////////////////////////////////////////////////////////////////////////////// +extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); + +// Helper function for De-allocating cl objects +// ********************************************************************* +extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs); + +// Helper function to get OpenCL error string from constant +// ********************************************************************* +extern "C" const char* oclErrorString(cl_int error); + +// Helper function to get OpenCL image format string (channel order and type) from constant +// ********************************************************************* +extern "C" const char* oclImageFormatString(cl_uint uiImageFormat); + +// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) +// ********************************************************************* +inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) +{ + // An error condition is defined by the sample/test value not equal to the reference + if (iReference != iSample) + { + // If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value + iSample = (iSample == 0) ? -9999 : iSample; + + // Log the error info + shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile); + + // Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code. + if (pCleanup != NULL) + { + pCleanup(iSample); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); + exit(iSample); + } + } +} + #endif \ No newline at end of file diff --git a/benchmarks/opencl/transpose/shrQATest.h b/benchmarks/opencl/transpose/shrQATest.h index 245cf8dc..93d2d9eb 100644 --- a/benchmarks/opencl/transpose/shrQATest.h +++ b/benchmarks/opencl/transpose/shrQATest.h @@ -1,238 +1,238 @@ -/* -* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. -* -* Please refer to the NVIDIA end user license agreement (EULA) associated -* with this source code for terms and conditions that govern your use of -* this software. Any use, reproduction, disclosure, or distribution of -* this software and related documentation outside the terms of the EULA -* is strictly prohibited. -* -*/ - -#ifndef SHR_QATEST_H -#define SHR_QATEST_H - -// ********************************************************************* -// Generic utilities for NVIDIA GPU Computing SDK -// ********************************************************************* - -// OS dependent includes -#ifdef _WIN32 - #pragma message ("Note: including windows.h") - #pragma message ("Note: including math.h") - #pragma message ("Note: including assert.h") - #pragma message ("Note: including time.h") - -// Headers needed for Windows - #include - #include -#else - // Headers needed for Linux - #include - #include - #include - #include - #include - #include - #include - #include - #include -#endif - -#ifndef STRCASECMP -#ifdef _WIN32 -#define STRCASECMP _stricmp -#else -#define STRCASECMP strcasecmp -#endif -#endif - -#ifndef STRNCASECMP -#ifdef _WIN32 -#define STRNCASECMP _strnicmp -#else -#define STRNCASECMP strncasecmp -#endif -#endif - - -// Standardized QA Start/Finish for CUDA SDK tests -#define shrQAStart(a, b) __shrQAStart(a, b) -#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c) -#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d) - -inline int findExeNameStart(const char *exec_name) -{ - int exename_start = (int)strlen(exec_name); - - while( (exename_start > 0) && - (exec_name[exename_start] != '\\') && - (exec_name[exename_start] != '/') ) - { - exename_start--; - } - if (exec_name[exename_start] == '\\' || - exec_name[exename_start] == '/') - { - return exename_start+1; - } else { - return exename_start; - } -} - -inline int __shrQAStart(int argc, char **argv) -{ - bool bQATest = false; - // First clear the output buffer - fflush(stdout); - fflush(stdout); - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - char *string_argv = &argv[i][string_start]; - - if (!STRCASECMP(string_argv, "qatest")) { - bQATest = true; - } - } - - // We don't want to print the entire path, so we search for the first - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start])); - } - fflush(stdout); - printf("\n"); fflush(stdout); - return exename_start; -} - -enum eQAstatus { - QA_FAILED = 0, - QA_PASSED = 1, - QA_WAIVED = 2 -}; - -inline void __ExitInTime(int seconds) -{ - fprintf(stdout, "> exiting in %d seconds: ", seconds); - fflush(stdout); - time_t t; - int count; - for (t=time(0)+seconds, count=seconds; time(0) < t; count--) { - fprintf(stdout, "%d...", count); -#ifdef WIN32 - Sleep(1000); -#else - sleep(1); -#endif - } - fprintf(stdout,"done!\n\n"); - fflush(stdout); -} - - -inline void __shrQAFinish(int argc, const char **argv, int iStatus) -{ - // By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() ) - bool bQATest = false, bNoPrompt = true, bQuitInTime = true; - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - - const char *string_argv = &argv[i][string_start]; - if (!STRCASECMP(string_argv, "qatest")) { - bQATest = true; - } - // For SDK individual samples that don't specify -noprompt or -prompt, - // a 3 second delay will happen before exiting, giving a user time to view results - if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { - bNoPrompt = true; - bQuitInTime = false; - } - if (!STRCASECMP(string_argv, "prompt")) { - bNoPrompt = false; - bQuitInTime = false; - } - } - - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); - } - fflush(stdout); - printf("\n"); fflush(stdout); - if (bQuitInTime) { - __ExitInTime(3); - } else { - if (!bNoPrompt) { - fprintf(stdout, "\nPress to exit...\n"); - fflush(stdout); - getchar(); - } - } -} - -inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus) -{ - bool bQuitInTime = true; - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - for (int i=1; i < argc; i++) { - int string_start = 0; - while (argv[i][string_start] == '-') - string_start++; - - const char *string_argv = &argv[i][string_start]; - // For SDK individual samples that don't specify -noprompt or -prompt, - // a 3 second delay will happen before exiting, giving a user time to view results - if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { - bQuitInTime = false; - } - if (!STRCASECMP(string_argv, "prompt")) { - bQuitInTime = false; - } - } - - int exename_start = findExeNameStart(argv[0]); - if (bQATest) { - fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); - for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); - fprintf(stdout, "\n"); - } else { - fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); - } - fflush(stdout); - - if (bQuitInTime) { - __ExitInTime(3); - } -} - -inline void shrQAFinishExit(int argc, const char **argv, int iStatus) -{ - __shrQAFinish(argc, argv, iStatus); - - exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); -} - -inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus) -{ - __shrQAFinish2(bQAtest, argc, argv, iStatus); - - exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); -} - +/* +* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#ifndef SHR_QATEST_H +#define SHR_QATEST_H + +// ********************************************************************* +// Generic utilities for NVIDIA GPU Computing SDK +// ********************************************************************* + +// OS dependent includes +#ifdef _WIN32 + #pragma message ("Note: including windows.h") + #pragma message ("Note: including math.h") + #pragma message ("Note: including assert.h") + #pragma message ("Note: including time.h") + +// Headers needed for Windows + #include + #include +#else + // Headers needed for Linux + #include + #include + #include + #include + #include + #include + #include + #include + #include +#endif + +#ifndef STRCASECMP +#ifdef _WIN32 +#define STRCASECMP _stricmp +#else +#define STRCASECMP strcasecmp +#endif +#endif + +#ifndef STRNCASECMP +#ifdef _WIN32 +#define STRNCASECMP _strnicmp +#else +#define STRNCASECMP strncasecmp +#endif +#endif + + +// Standardized QA Start/Finish for CUDA SDK tests +#define shrQAStart(a, b) __shrQAStart(a, b) +#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c) +#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d) + +inline int findExeNameStart(const char *exec_name) +{ + int exename_start = (int)strlen(exec_name); + + while( (exename_start > 0) && + (exec_name[exename_start] != '\\') && + (exec_name[exename_start] != '/') ) + { + exename_start--; + } + if (exec_name[exename_start] == '\\' || + exec_name[exename_start] == '/') + { + return exename_start+1; + } else { + return exename_start; + } +} + +inline int __shrQAStart(int argc, char **argv) +{ + bool bQATest = false; + // First clear the output buffer + fflush(stdout); + fflush(stdout); + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + char *string_argv = &argv[i][string_start]; + + if (!STRCASECMP(string_argv, "qatest")) { + bQATest = true; + } + } + + // We don't want to print the entire path, so we search for the first + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start])); + } + fflush(stdout); + printf("\n"); fflush(stdout); + return exename_start; +} + +enum eQAstatus { + QA_FAILED = 0, + QA_PASSED = 1, + QA_WAIVED = 2 +}; + +inline void __ExitInTime(int seconds) +{ + fprintf(stdout, "> exiting in %d seconds: ", seconds); + fflush(stdout); + time_t t; + int count; + for (t=time(0)+seconds, count=seconds; time(0) < t; count--) { + fprintf(stdout, "%d...", count); +#ifdef WIN32 + Sleep(1000); +#else + sleep(1); +#endif + } + fprintf(stdout,"done!\n\n"); + fflush(stdout); +} + + +inline void __shrQAFinish(int argc, const char **argv, int iStatus) +{ + // By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() ) + bool bQATest = false, bNoPrompt = true, bQuitInTime = true; + const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + + const char *string_argv = &argv[i][string_start]; + if (!STRCASECMP(string_argv, "qatest")) { + bQATest = true; + } + // For SDK individual samples that don't specify -noprompt or -prompt, + // a 3 second delay will happen before exiting, giving a user time to view results + if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { + bNoPrompt = true; + bQuitInTime = false; + } + if (!STRCASECMP(string_argv, "prompt")) { + bNoPrompt = false; + bQuitInTime = false; + } + } + + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); + } + fflush(stdout); + printf("\n"); fflush(stdout); + if (bQuitInTime) { + __ExitInTime(3); + } else { + if (!bNoPrompt) { + fprintf(stdout, "\nPress to exit...\n"); + fflush(stdout); + getchar(); + } + } +} + +inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus) +{ + bool bQuitInTime = true; + const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; + + for (int i=1; i < argc; i++) { + int string_start = 0; + while (argv[i][string_start] == '-') + string_start++; + + const char *string_argv = &argv[i][string_start]; + // For SDK individual samples that don't specify -noprompt or -prompt, + // a 3 second delay will happen before exiting, giving a user time to view results + if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) { + bQuitInTime = false; + } + if (!STRCASECMP(string_argv, "prompt")) { + bQuitInTime = false; + } + } + + int exename_start = findExeNameStart(argv[0]); + if (bQATest) { + fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start])); + for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]); + } + fflush(stdout); + + if (bQuitInTime) { + __ExitInTime(3); + } +} + +inline void shrQAFinishExit(int argc, const char **argv, int iStatus) +{ + __shrQAFinish(argc, argv, iStatus); + + exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); +} + +inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus) +{ + __shrQAFinish2(bQAtest, argc, argv, iStatus); + + exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE); +} + #endif \ No newline at end of file diff --git a/benchmarks/opencl/transpose/shrUtils.h b/benchmarks/opencl/transpose/shrUtils.h index 0f2795d4..45ace670 100644 --- a/benchmarks/opencl/transpose/shrUtils.h +++ b/benchmarks/opencl/transpose/shrUtils.h @@ -1,642 +1,642 @@ -/* -* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. -* -* Please refer to the NVIDIA end user license agreement (EULA) associated -* with this source code for terms and conditions that govern your use of -* this software. Any use, reproduction, disclosure, or distribution of -* this software and related documentation outside the terms of the EULA -* is strictly prohibited. -* -*/ - -#ifndef SHR_UTILS_H -#define SHR_UTILS_H - -// ********************************************************************* -// Generic utilities for NVIDIA GPU Computing SDK -// ********************************************************************* - -// reminders for output window and build log -#ifdef _WIN32 - #pragma message ("Note: including windows.h") - #pragma message ("Note: including math.h") - #pragma message ("Note: including assert.h") -#endif - -// OS dependent includes -#ifdef _WIN32 - // Headers needed for Windows - #include -#else - // Headers needed for Linux - #include - #include - #include - #include - #include - #include - #include -#endif - -// Other headers needed for both Windows and Linux -#include -#include -#include -#include -#include - -// Un-comment the following #define to enable profiling code in SDK apps -//#define GPU_PROFILING - -// Beginning of GPU Architecture definitions -inline int ConvertSMVer2Cores(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class - { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class - { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class - { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class - { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class - { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class - { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class - { -1, -1 } - }; - - int index = 0; - while (nGpuArchCoresPerSM[index].SM != -1) { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { - return nGpuArchCoresPerSM[index].Cores; - } - index++; - } - printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor); - return -1; -} -// end of GPU Architecture definitions - - -// Defines and enum for use with logging functions -// ********************************************************************* -#define DEFAULTLOGFILE "SdkConsoleLog.txt" -#define MASTERLOGFILE "SdkMasterLog.csv" -enum LOGMODES -{ - LOGCONSOLE = 1, // bit to signal "log to console" - LOGFILE = 2, // bit to signal "log to file" - LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both" - APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open - MASTER = 8, // bit to signal master .csv log output - ERRORMSG = 16, // bit to signal "pre-pend Error" - CLOSELOG = 32 // bit to close log file, if open, after any requested file write -}; -#define HDASHLINE "-----------------------------------------------------------\n" - -// Standardized boolean -enum shrBOOL -{ - shrFALSE = 0, - shrTRUE = 1 -}; - -// Standardized MAX, MIN and CLAMP -#define MAX(a, b) ((a > b) ? a : b) -#define MIN(a, b) ((a < b) ? a : b) -#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a -#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a - -// Error and Exit Handling Macros... -// ********************************************************************* -// Full error handling macro with Cleanup() callback (if supplied)... -// (Companion Inline Function lower on page) -#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__) - -// Short version without Cleanup() callback pointer -// Both Input (a) and Reference (b) are specified as args -#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0) - -// Standardized Exit Macro for leaving main()... extended version -// (Companion Inline Function lower on page) -#define shrExitEX(a, b, c) __shrExitEX(a, b, c) - -// Standardized Exit Macro for leaving main()... short version -// (Companion Inline Function lower on page) -#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS) - -// Simple argument checker macro -#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE - -// Define for user-customized error handling -#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__ - -// Function to deallocate memory allocated within shrUtils -// ********************************************************************* -extern "C" void shrFree(void* ptr); - -// ********************************************************************* -// Helper function to log standardized information to Console, to File or to both -//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n"); -//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); -//! -//! Automatically opens file and stores handle if needed and not done yet -//! Closes file and nulls handle on request -//! -//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG. -//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode -//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close" -//! First 3 options may be | 'd with MASTER to enable independent write to master data log file -//! First 3 options may be | 'd with ERRORMSG to start line with standard error message -//! @param 2 dValue: -//! Positive val = double value for time in secs to be formatted to 6 decimals. -//! Negative val is an error code and this give error preformatting. -//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf. -//! ALL printf flags, width, precision and type specifiers are supported with this exception: -//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported -//! Single byte char type specifiers (%s and %c) ARE supported -//! @param 4... variable args: like printf or fprintf. Must match format specifer type above. -//! @return 0 if OK, negative value on error or if error occurs or was passed in. -// ********************************************************************* -extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...); - -// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0, -// ********************************************************************* -extern "C" int shrLog(const char* cFormatString, ...); - -// ********************************************************************* -// Delta timer function for up to 3 independent timers using host high performance counters -// Maintains state for 3 independent counters -//! Example: double dElapsedTime = shrDeltaTime(0); -//! -//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2) -//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error -// ********************************************************************* -extern "C" double shrDeltaT(int iCounterID); - -// Optional LogFileNameOverride function -// ********************************************************************* -extern "C" void shrSetLogFileName (const char* cOverRideName); - -// Helper function to init data arrays -// ********************************************************************* -extern "C" void shrFillArray(float* pfData, int iSize); - -// Helper function to print data arrays -// ********************************************************************* -extern "C" void shrPrintArray(float* pfData, int iSize); - -//////////////////////////////////////////////////////////////////////////// -//! Find the path for a filename -//! @return the path if succeeded, otherwise 0 -//! @param filename name of the file -//! @param executablePath optional absolute path of the executable -//////////////////////////////////////////////////////////////////////////// -extern "C" char* shrFindFilePath(const char* filename, const char* executablePath); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing single precision floating point data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing double precision floating point data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing integer data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned integer data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data, - unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing char / byte data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned char / byte data -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data, - unsigned int* len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing single precision floating point -//! data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len, - const float epsilon, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing double precision floating point -//! data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len, - const double epsilon, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing integer data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned integer data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing char / byte data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned char / byte data -//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Load PPM image file (with unsigned char as data element type), padding -//! 4th component -//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param OutData handle to the data read -//! @param w width of the image -//! @param h height of the image -//! -//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PPM image file (with unsigned char as data element type, padded to -//! 4 bytes) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with unsigned char as data element type) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with unsigned char as data element type) -//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is initialized -//! within shrUtils, then free() has to be used to deallocate the memory -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data, - unsigned int *w,unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -// Command line arguments: General notes -// * All command line arguments begin with '--' followed by the token; -// token and value are seperated by '='; example --samples=50 -// * Arrays have the form --model=[one.obj,two.obj,three.obj] -// (without whitespaces) -//////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////// -//! Check if command line argument \a flag-name is given -//! @return shrTRUE if command line argument \a flag_name has been given, -//! otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param flag_name name of command line flag -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv, - const char* flag_name); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type int -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv, - const char* arg_name, int* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type unsigned int -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv, - const char* arg_name, unsigned int* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type float -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv, - const char* arg_name, float* val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type string -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv, - const char* arg_name, char** val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument list those element are strings -//! @return shrTRUE if command line argument \a arg_name has been given and -//! is of the requested type, otherwise shrFALSE -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val command line argument list -//! @param len length of the list / number of elements -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv, - const char* arg_name, char** val, - unsigned int* len); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparef( const float* reference, const float* data, - const unsigned int len); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparei( const int* reference, const int* data, - const unsigned int len ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned integer arrays, with epsilon and threshold -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned char arrays -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data, - const unsigned int len ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integers with a tolernance for # of byte errors -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays witha n epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparefe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality and a -//! threshold for # pixel errors -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparefet( const float* reference, const float* data, - const unsigned int len, const float epsilon, const float threshold ); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays using L2-norm with an epsilon tolerance for -//! equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two PPM image files with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param src_file filename for the image to be compared -//! @param data filename for the reference data / gold image -//! @param epsilon epsilon to use for the comparison -//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) -//! $param verboseErrors output details of image mismatch to std::err -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two PGM image files with an epsilon tolerance for equality -//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE -//! @param src_file filename for the image to be compared -//! @param data filename for the reference data / gold image -//! @param epsilon epsilon to use for the comparison -//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) -//! $param verboseErrors output details of image mismatch to std::err -//////////////////////////////////////////////////////////////////////////////// -extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); - -extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size); - -extern "C" size_t shrRoundUp(int group_size, int global_size); - -// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) -// ********************************************************************* -inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) -{ - if (iReference != iSample) - { - shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile); - if (pCleanup != NULL) - { - pCleanup(EXIT_FAILURE); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); - exit(EXIT_FAILURE); - } - } -} - -// Standardized Exit -// ********************************************************************* -inline void __shrExitEX(int argc, const char** argv, int iExitCode) -{ -#ifdef WIN32 - if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) -#else - if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) -#endif - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress to Quit...\n"); - getchar(); - } - else - { - shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]); - } - fflush(stderr); - exit(iExitCode); -} - +/* +* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#ifndef SHR_UTILS_H +#define SHR_UTILS_H + +// ********************************************************************* +// Generic utilities for NVIDIA GPU Computing SDK +// ********************************************************************* + +// reminders for output window and build log +#ifdef _WIN32 + #pragma message ("Note: including windows.h") + #pragma message ("Note: including math.h") + #pragma message ("Note: including assert.h") +#endif + +// OS dependent includes +#ifdef _WIN32 + // Headers needed for Windows + #include +#else + // Headers needed for Linux + #include + #include + #include + #include + #include + #include + #include +#endif + +// Other headers needed for both Windows and Linux +#include +#include +#include +#include +#include + +// Un-comment the following #define to enable profiling code in SDK apps +//#define GPU_PROFILING + +// Beginning of GPU Architecture definitions +inline int ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = + { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class + { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class + { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class + { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class + { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class + { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class + { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class + { -1, -1 } + }; + + int index = 0; + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { + return nGpuArchCoresPerSM[index].Cores; + } + index++; + } + printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor); + return -1; +} +// end of GPU Architecture definitions + + +// Defines and enum for use with logging functions +// ********************************************************************* +#define DEFAULTLOGFILE "SdkConsoleLog.txt" +#define MASTERLOGFILE "SdkMasterLog.csv" +enum LOGMODES +{ + LOGCONSOLE = 1, // bit to signal "log to console" + LOGFILE = 2, // bit to signal "log to file" + LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both" + APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open + MASTER = 8, // bit to signal master .csv log output + ERRORMSG = 16, // bit to signal "pre-pend Error" + CLOSELOG = 32 // bit to close log file, if open, after any requested file write +}; +#define HDASHLINE "-----------------------------------------------------------\n" + +// Standardized boolean +enum shrBOOL +{ + shrFALSE = 0, + shrTRUE = 1 +}; + +// Standardized MAX, MIN and CLAMP +#define MAX(a, b) ((a > b) ? a : b) +#define MIN(a, b) ((a < b) ? a : b) +#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a +#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a + +// Error and Exit Handling Macros... +// ********************************************************************* +// Full error handling macro with Cleanup() callback (if supplied)... +// (Companion Inline Function lower on page) +#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__) + +// Short version without Cleanup() callback pointer +// Both Input (a) and Reference (b) are specified as args +#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0) + +// Standardized Exit Macro for leaving main()... extended version +// (Companion Inline Function lower on page) +#define shrExitEX(a, b, c) __shrExitEX(a, b, c) + +// Standardized Exit Macro for leaving main()... short version +// (Companion Inline Function lower on page) +#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS) + +// Simple argument checker macro +#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE + +// Define for user-customized error handling +#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__ + +// Function to deallocate memory allocated within shrUtils +// ********************************************************************* +extern "C" void shrFree(void* ptr); + +// ********************************************************************* +// Helper function to log standardized information to Console, to File or to both +//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n"); +//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); +//! +//! Automatically opens file and stores handle if needed and not done yet +//! Closes file and nulls handle on request +//! +//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG. +//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode +//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close" +//! First 3 options may be | 'd with MASTER to enable independent write to master data log file +//! First 3 options may be | 'd with ERRORMSG to start line with standard error message +//! @param 2 dValue: +//! Positive val = double value for time in secs to be formatted to 6 decimals. +//! Negative val is an error code and this give error preformatting. +//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf. +//! ALL printf flags, width, precision and type specifiers are supported with this exception: +//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported +//! Single byte char type specifiers (%s and %c) ARE supported +//! @param 4... variable args: like printf or fprintf. Must match format specifer type above. +//! @return 0 if OK, negative value on error or if error occurs or was passed in. +// ********************************************************************* +extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...); + +// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0, +// ********************************************************************* +extern "C" int shrLog(const char* cFormatString, ...); + +// ********************************************************************* +// Delta timer function for up to 3 independent timers using host high performance counters +// Maintains state for 3 independent counters +//! Example: double dElapsedTime = shrDeltaTime(0); +//! +//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2) +//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error +// ********************************************************************* +extern "C" double shrDeltaT(int iCounterID); + +// Optional LogFileNameOverride function +// ********************************************************************* +extern "C" void shrSetLogFileName (const char* cOverRideName); + +// Helper function to init data arrays +// ********************************************************************* +extern "C" void shrFillArray(float* pfData, int iSize); + +// Helper function to print data arrays +// ********************************************************************* +extern "C" void shrPrintArray(float* pfData, int iSize); + +//////////////////////////////////////////////////////////////////////////// +//! Find the path for a filename +//! @return the path if succeeded, otherwise 0 +//! @param filename name of the file +//! @param executablePath optional absolute path of the executable +//////////////////////////////////////////////////////////////////////////// +extern "C" char* shrFindFilePath(const char* filename, const char* executablePath); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing single precision floating point data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing double precision floating point data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing integer data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing unsigned integer data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data, + unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing char / byte data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Read file \filename containing unsigned char / byte data +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +//! @note If a NULL pointer is passed to this function and it is +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data, + unsigned int* len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing single precision floating point +//! data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//! @param epsilon epsilon for comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len, + const float epsilon, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing double precision floating point +//! data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//! @param epsilon epsilon for comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len, + const double epsilon, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing integer data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing unsigned integer data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data, + unsigned int len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing char / byte data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len, + bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename containing unsigned char / byte data +//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE +//! @param filename name of the file to write +//! @param data pointer to data to write +//! @param len number of data elements in data, -1 on error +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data, + unsigned int len, bool verbose = false); + +//////////////////////////////////////////////////////////////////////////// +//! Load PPM image file (with unsigned char as data element type), padding +//! 4th component +//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param OutData handle to the data read +//! @param w width of the image +//! @param h height of the image +//! +//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData, + unsigned int *w, unsigned int *h); + +//////////////////////////////////////////////////////////////////////////// +//! Save PPM image file (with unsigned char as data element type, padded to +//! 4 bytes) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data, + unsigned int w, unsigned int h); + +//////////////////////////////////////////////////////////////////////////////// +//! Save PGM image file (with unsigned char as data element type) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data, + unsigned int w, unsigned int h); + +//////////////////////////////////////////////////////////////////////////// +//! Load PGM image file (with unsigned char as data element type) +//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE +//! @param file name of the image file +//! @param data handle to the data read +//! @param w width of the image +//! @param h height of the image +//! @note If a NULL pointer is passed to this function and it is initialized +//! within shrUtils, then free() has to be used to deallocate the memory +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data, + unsigned int *w,unsigned int *h); + +//////////////////////////////////////////////////////////////////////////// +// Command line arguments: General notes +// * All command line arguments begin with '--' followed by the token; +// token and value are seperated by '='; example --samples=50 +// * Arrays have the form --model=[one.obj,two.obj,three.obj] +// (without whitespaces) +//////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////// +//! Check if command line argument \a flag-name is given +//! @return shrTRUE if command line argument \a flag_name has been given, +//! otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param flag_name name of command line flag +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv, + const char* flag_name); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type int +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv, + const char* arg_name, int* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type unsigned int +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv, + const char* arg_name, unsigned int* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type float +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv, + const char* arg_name, float* val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument of type string +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val value of the command line argument +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv, + const char* arg_name, char** val); + +//////////////////////////////////////////////////////////////////////////// +//! Get the value of a command line argument list those element are strings +//! @return shrTRUE if command line argument \a arg_name has been given and +//! is of the requested type, otherwise shrFALSE +//! @param argc argc as passed to main() +//! @param argv argv as passed to main() +//! @param arg_name name of the command line argument +//! @param val command line argument list +//! @param len length of the list / number of elements +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv, + const char* arg_name, char** val, + unsigned int* len); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparef( const float* reference, const float* data, + const unsigned int len); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two integer arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparei( const int* reference, const int* data, + const unsigned int len ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two unsigned integer arrays, with epsilon and threshold +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param threshold tolerance % # of comparison errors (0.15f = 15%) +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two unsigned char arrays +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data, + const unsigned int len ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two integers with a tolernance for # of byte errors +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//! @param threshold tolerance % # of comparison errors (0.15f = 15%) +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two integer arrays witha n epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparefe( const float* reference, const float* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays with an epsilon tolerance for equality and a +//! threshold for # pixel errors +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparefet( const float* reference, const float* data, + const unsigned int len, const float epsilon, const float threshold ); + +//////////////////////////////////////////////////////////////////////////// +//! Compare two float arrays using L2-norm with an epsilon tolerance for +//! equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data, + const unsigned int len, const float epsilon ); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two PPM image files with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param src_file filename for the image to be compared +//! @param data filename for the reference data / gold image +//! @param epsilon epsilon to use for the comparison +//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) +//! $param verboseErrors output details of image mismatch to std::err +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); + +//////////////////////////////////////////////////////////////////////////////// +//! Compare two PGM image files with an epsilon tolerance for equality +//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE +//! @param src_file filename for the image to be compared +//! @param data filename for the reference data / gold image +//! @param epsilon epsilon to use for the comparison +//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) +//! $param verboseErrors output details of image mismatch to std::err +//////////////////////////////////////////////////////////////////////////////// +extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold); + +extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size); + +extern "C" size_t shrRoundUp(int group_size, int global_size); + +// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) +// ********************************************************************* +inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) +{ + if (iReference != iSample) + { + shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile); + if (pCleanup != NULL) + { + pCleanup(EXIT_FAILURE); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); + exit(EXIT_FAILURE); + } + } +} + +// Standardized Exit +// ********************************************************************* +inline void __shrExitEX(int argc, const char** argv, int iExitCode) +{ +#ifdef WIN32 + if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) +#else + if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest")) +#endif + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress to Quit...\n"); + getchar(); + } + else + { + shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]); + } + fflush(stderr); + exit(iExitCode); +} + #endif \ No newline at end of file diff --git a/benchmarks/new_opencl/vecadd/.gitignore b/benchmarks/opencl/vecadd/.gitignore similarity index 100% rename from benchmarks/new_opencl/vecadd/.gitignore rename to benchmarks/opencl/vecadd/.gitignore diff --git a/benchmarks/opencl/vecadd/Makefile b/benchmarks/opencl/vecadd/Makefile index 728ffb7b..145f51ea 100644 --- a/benchmarks/opencl/vecadd/Makefile +++ b/benchmarks/opencl/vecadd/Makefile @@ -1,68 +1,47 @@ -RISCV_TOOL_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops) -POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc) -POCL_INC_PATH ?= $(wildcard ../include) -POCL_LIB_PATH ?= $(wildcard ../lib) -VX_RT_PATH ?= $(wildcard ../../../runtime) -VX_SIMX_PATH ?= $(wildcard ../../../simX/obj_dir) +LLVM_HOME ?= ~/dev/llvm-project/drops +TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +SYSROOT ?= $(TOOLCHAIN_PATH)/riscv32-unknown-elf +POCL_CC_PATH ?= $(realpath ../compiler) +POCL_RT_PATH ?= $(realpath ../runtime) +VORTEX_DRV_PATH ?= $(realpath ../../../driver/sw) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) -CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc -CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ -DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump -HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy -GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb +CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors -VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c -VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S -VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s -VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c -VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s -VX_SRCS += $(VX_RT_PATH)/tests/tests.c -VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c -VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) +CXXFLAGS += -I$(POCL_RT_PATH)/include -VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld - -CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 -CXXFLAGS += -ffreestanding # program may not begin at main() -CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections -CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions -CXXFLAGS += -I$(POCL_INC_PATH) - -VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a -QEMU_LIBS = $(VX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex PROJECT = vecadd SRCS = main.cc -all: $(PROJECT).dump $(PROJECT).hex +all: $(PROJECT) -lib$(PROJECT).a: kernel.cl - POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl +kernel.pocl: kernel.cl + TOOLCHAIN_PATH=$(TOOLCHAIN_PATH) SYSROOT=$(SYSROOT) LLVM_HOME=$(LLVM_HOME) VORTEX_RUNTIME_PATH=$(VORTEX_RT_PATH) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_HOME)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -$(PROJECT).elf: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf +run-fpga: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu +run-ase: $(PROJECT) kernel.pocl + ASE_LOG=0 LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).hex: $(PROJECT).elf - $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex +run-simx: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -$(PROJECT).dump: $(PROJECT).elf - $(DMP) -D $(PROJECT).elf > $(PROJECT).dump +run-rtlsim: $(PROJECT) kernel.pocl + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -run: $(PROJECT).hex - POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug - -qemu: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-s: $(PROJECT).qemu - POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu - -gdb-c: $(PROJECT).qemu - $(GDB) $(PROJECT).qemu +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file + rm -rf $(PROJECT) *.o *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/benchmarks/new_opencl/vecadd/kernel.pocl b/benchmarks/opencl/vecadd/kernel.pocl similarity index 100% rename from benchmarks/new_opencl/vecadd/kernel.pocl rename to benchmarks/opencl/vecadd/kernel.pocl diff --git a/benchmarks/opencl/vecadd/main.cc b/benchmarks/opencl/vecadd/main.cc index 178111c1..68c9675a 100644 --- a/benchmarks/opencl/vecadd/main.cc +++ b/benchmarks/opencl/vecadd/main.cc @@ -1,13 +1,12 @@ #include #include +#include #include #include -#define MAX_KERNELS 1 -#define KERNEL_NAME "vecadd" -#define KERNEL_FILE_NAME "vecadd.pocl" #define SIZE 4 #define NUM_WORK_GROUPS 2 +#define KERNEL_NAME "vecadd" #define CL_CHECK(_expr) \ do { \ @@ -22,7 +21,7 @@ #define CL_CHECK2(_expr) \ ({ \ cl_int _err = CL_INVALID_VALUE; \ - typeof(_expr) _ret = _expr; \ + decltype(_expr) _ret = _expr; \ if (_err != CL_SUCCESS) { \ printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ cleanup(); \ @@ -42,9 +41,30 @@ cl_mem c_memobj = NULL; cl_int *A = NULL; cl_int *B = NULL; cl_int *C = NULL; -char *binary = NULL; +uint8_t *kernel_bin = NULL; -void cleanup() { +static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { + if (nullptr == filename || nullptr == data || 0 == size) + return -1; + + FILE* fp = fopen(filename, "r"); + if (NULL == fp) { + fprintf(stderr, "Failed to load kernel."); + return -1; + } + fseek(fp , 0 , SEEK_END); + long fsize = ftell(fp); + rewind(fp); + + *data = (uint8_t*)malloc(fsize); + *size = fread(*data, 1, fsize, fp); + + fclose(fp); + + return 0; +} + +static void cleanup() { if (commandQueue) clReleaseCommandQueue(commandQueue); if (kernel) clReleaseKernel(kernel); if (program) clReleaseProgram(program); @@ -52,19 +72,46 @@ void cleanup() { if (b_memobj) clReleaseMemObject(b_memobj); if (c_memobj) clReleaseMemObject(c_memobj); if (context) clReleaseContext(context); - if (binary) free(binary); + if (kernel_bin) free(kernel_bin); if (A) free(A); if (B) free(B); if (C) free(C); } +static int find_device(char* name, cl_platform_id platform_id, cl_device_id *device_id) { + cl_device_id device_ids[64]; + cl_uint num_devices = 0; + + CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 64, device_ids, &num_devices)); + + for (int i=0; i