diff --git a/tests/regression/Makefile b/tests/regression/Makefile index 89fa25af..d44c82c4 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -10,7 +10,8 @@ all: $(MAKE) -C fence $(MAKE) -C no_mf_ext $(MAKE) -C no_smem - $(MAKE) -C tensor + $(MAKE) -C vecaddx + $(MAKE) -C sgemmx run-simx: $(MAKE) -C basic run-simx @@ -24,7 +25,8 @@ run-simx: $(MAKE) -C fence run-simx $(MAKE) -C no_mf_ext run-simx $(MAKE) -C no_smem run-simx - $(MAKE) -C tensor run-simx + $(MAKE) -C vecaddx run-simx + $(MAKE) -C sgemmx run-simx run-rtlsim: $(MAKE) -C basic run-rtlsim @@ -38,7 +40,8 @@ run-rtlsim: $(MAKE) -C fence run-rtlsim $(MAKE) -C no_mf_ext run-rtlsim $(MAKE) -C no_smem run-rtlsim - $(MAKE) -C tensor run-rtlsim + $(MAKE) -C vecaddx run-rtlsim + $(MAKE) -C sgemmx run-rtlsim run-opae: $(MAKE) -C basic run-opae @@ -52,7 +55,8 @@ run-opae: $(MAKE) -C fence run-opae $(MAKE) -C no_mf_ext run-opae $(MAKE) -C no_smem run-opae - $(MAKE) -C tensor run-opae + $(MAKE) -C vecaddx run-opae + $(MAKE) -C sgemmx run-opae clean: $(MAKE) -C basic clean @@ -66,7 +70,8 @@ clean: $(MAKE) -C fence clean $(MAKE) -C no_mf_ext clean $(MAKE) -C no_smem clean - $(MAKE) -C tensor clean + $(MAKE) -C vecaddx clean + $(MAKE) -C sgemmx clean clean-all: $(MAKE) -C basic clean-all @@ -80,4 +85,5 @@ clean-all: $(MAKE) -C fence clean-all $(MAKE) -C no_mf_ext clean-all $(MAKE) -C no_smem clean-all - $(MAKE) -C tensor clean-all + $(MAKE) -C vecaddx clean-all + $(MAKE) -C sgemmx clean-all diff --git a/tests/regression/tensor/Makefile b/tests/regression/sgemmx/Makefile similarity index 81% rename from tests/regression/tensor/Makefile rename to tests/regression/sgemmx/Makefile index dbb70c3b..2e72b32e 100644 --- a/tests/regression/tensor/Makefile +++ b/tests/regression/sgemmx/Makefile @@ -1,4 +1,4 @@ -PROJECT = tensor +PROJECT = sgemmx SRCS = main.cpp diff --git a/tests/regression/tensor/common.h b/tests/regression/sgemmx/common.h similarity index 100% rename from tests/regression/tensor/common.h rename to tests/regression/sgemmx/common.h diff --git a/tests/regression/tensor/kernel.cpp b/tests/regression/sgemmx/kernel.cpp similarity index 100% rename from tests/regression/tensor/kernel.cpp rename to tests/regression/sgemmx/kernel.cpp diff --git a/tests/regression/tensor/main.cpp b/tests/regression/sgemmx/main.cpp similarity index 100% rename from tests/regression/tensor/main.cpp rename to tests/regression/sgemmx/main.cpp diff --git a/tests/regression/vecaddx/Makefile b/tests/regression/vecaddx/Makefile new file mode 100644 index 00000000..af43d3c7 --- /dev/null +++ b/tests/regression/vecaddx/Makefile @@ -0,0 +1,9 @@ +PROJECT = vecaddx + +SRCS = main.cpp + +VX_SRCS = kernel.cpp + +OPTS ?= -n64 + +include ../common.mk \ No newline at end of file diff --git a/tests/regression/vecaddx/common.h b/tests/regression/vecaddx/common.h new file mode 100644 index 00000000..2b8f164a --- /dev/null +++ b/tests/regression/vecaddx/common.h @@ -0,0 +1,17 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +#ifndef TYPE +#define TYPE float +#endif + +typedef struct { + uint32_t num_points; + uint64_t src0_addr; + uint64_t src1_addr; + uint64_t dst_addr; +} kernel_arg_t; + +#endif diff --git a/tests/regression/vecaddx/kernel.cpp b/tests/regression/vecaddx/kernel.cpp new file mode 100644 index 00000000..6ed42164 --- /dev/null +++ b/tests/regression/vecaddx/kernel.cpp @@ -0,0 +1,18 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { + auto src0_ptr = reinterpret_cast(arg->src0_addr); + auto src1_ptr = reinterpret_cast(arg->src1_addr); + auto dst_ptr = reinterpret_cast(arg->dst_addr); + + dst_ptr[task_id] = src0_ptr[task_id] + src1_ptr[task_id]; +} + +int main() { + kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); + return 0; +} diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp new file mode 100644 index 00000000..117f3470 --- /dev/null +++ b/tests/regression/vecaddx/main.cpp @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include "common.h" + +#define FLOAT_ULP 6 + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +template +class Comparator {}; + +template <> +class Comparator { +public: + static const char* type_str() { + return "integer"; + } + static int generate() { + return rand(); + } + static bool compare(int a, int b, int index, int errors) { + if (a != b) { + if (errors < 100) { + printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b); + } + return false; + } + return true; + } +}; + +template <> +class Comparator { +private: + union Float_t { float f; int i; }; +public: + static const char* type_str() { + return "float"; + } + static int generate() { + return static_cast(rand()) / RAND_MAX; + } + static bool compare(float a, float b, int index, int errors) { + union fi_t { float f; int32_t i; }; + fi_t fa, fb; + fa.f = a; + fb.f = b; + auto d = std::abs(fa.i - fb.i); + if (d > FLOAT_ULP) { + if (errors < 100) { + printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b); + } + return false; + } + return true; + } +}; + +const char* kernel_file = "kernel.bin"; +uint32_t size = 16; + +vx_device_h device = nullptr; +std::vector source_data; +std::vector staging_buf; +kernel_arg_t kernel_arg = {}; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + switch (c) { + case 'n': + size = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (device) { + vx_mem_free(device, kernel_arg.src0_addr); + vx_mem_free(device, kernel_arg.src1_addr); + vx_mem_free(device, kernel_arg.dst_addr); + vx_dev_close(device); + } +} + +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (TYPE*)staging_buf.data(); + for (uint32_t i = 0; i < num_points; ++i) { + auto ref = source_data[2 * i + 0] + source_data[2 * i + 1]; + auto cur = buf_ptr[i]; + if (!Comparator::compare(cur, ref, i, errors)) { + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + // parse command arguments + parse_args(argc, argv); + + std::srand(50); + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint64_t num_cores, num_warps, num_threads; + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); + std::cout << "number of cores: " << num_cores << std::endl; + std::cout << "number of warps: " << num_warps << std::endl; + std::cout << "number of threads: " << num_threads << std::endl; + + uint32_t num_points = size; + uint32_t buf_size = num_points * sizeof(TYPE); + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "data type: " << Comparator::type_str() << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); + + kernel_arg.num_points = num_points; + + std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; + + // allocate staging buffer + std::cout << "allocate staging buffer" << std::endl; + uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); + staging_buf.resize(alloc_size); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); + + // generate source data + source_data.resize(2 * num_points); + for (uint32_t i = 0; i < source_data.size(); ++i) { + source_data[i] = Comparator::generate(); + } + + // upload source buffer0 + { + std::cout << "upload source buffer0" << std::endl; + auto buf_ptr = (TYPE*)staging_buf.data(); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = source_data[2 * i + 0]; + } + RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, staging_buf.data(), buf_size)); + } + + // upload source buffer1 + { + std::cout << "upload source buffer1" << std::endl; + auto buf_ptr = (TYPE*)staging_buf.data(); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = source_data[2 * i + 1]; + } + RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, staging_buf.data(), buf_size)); + } + + // clear destination buffer + std::cout << "clear destination buffer" << std::endl; + memset(staging_buf.data(), 0, num_points * sizeof(TYPE)); + RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); + + // run tests + std::cout << "run tests" << std::endl; + RT_CHECK(run_test(kernel_arg, buf_size, num_points)); + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file