refactoring device memory allocation and cleanup
This commit is contained in:
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,8 +5,8 @@
|
||||
void main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
uint32_t count = arg->count;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
uint32_t offset = vx_core_id() * count;
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -57,9 +58,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -152,7 +155,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// clear destination buffer
|
||||
@@ -163,7 +166,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start execution" << std::endl;
|
||||
@@ -175,7 +178,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
// read destination buffer from local memory
|
||||
std::cout << "read destination buffer from local memory" << std::endl;
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
||||
@@ -215,8 +218,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
@@ -238,25 +240,25 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// allocate device memory
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.count = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_addr, 0x0badf00d40ff40ff, num_blocks));
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
typedef struct {
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -99,7 +103,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
@@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_points;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -6,8 +6,8 @@
|
||||
// Parallel Selection sort
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
int value = src_ptr[task_id];
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ std::vector<int> ref_data;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -125,7 +128,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -153,7 +156,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -189,22 +191,22 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t)));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -222,7 +224,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -232,7 +234,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -7,9 +7,9 @@ typedef struct {
|
||||
uint32_t testid;
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -13,9 +13,9 @@ inline float __ieee754_sqrtf (float x) {
|
||||
|
||||
void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -28,9 +28,9 @@ void kernel_iadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -43,9 +43,9 @@ void kernel_imul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -58,9 +58,9 @@ void kernel_idiv(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -75,9 +75,9 @@ void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -90,9 +90,9 @@ void kernel_fadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -105,9 +105,9 @@ void kernel_fsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -120,9 +120,9 @@ void kernel_fmul(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -135,9 +135,9 @@ void kernel_fmadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -150,9 +150,9 @@ void kernel_fmsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -165,9 +165,9 @@ void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -180,9 +180,9 @@ void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -197,9 +197,9 @@ void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -212,9 +212,9 @@ void kernel_fdiv(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -229,9 +229,9 @@ void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -244,9 +244,9 @@ void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -260,9 +260,9 @@ void kernel_ftoi(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
float* src0_ptr = (float*)arg->src0_ptr;
|
||||
float* src1_ptr = (float*)arg->src1_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
|
||||
float* src0_ptr = (float*)arg->src0_addr;
|
||||
float* src1_ptr = (float*)arg->src1_addr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -276,9 +276,9 @@ void kernel_ftou(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
@@ -292,9 +292,9 @@ void kernel_itof(int task_id, kernel_arg_t* arg) {
|
||||
|
||||
void kernel_utof(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
|
||||
@@ -87,6 +87,7 @@ vx_buffer_h arg_buf = nullptr;
|
||||
vx_buffer_h src1_buf = nullptr;
|
||||
vx_buffer_h src2_buf = nullptr;
|
||||
vx_buffer_h dst_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -130,26 +131,28 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (arg_buf) {
|
||||
vx_buf_release(arg_buf);
|
||||
vx_buf_free(arg_buf);
|
||||
}
|
||||
if (src1_buf) {
|
||||
vx_buf_release(src1_buf);
|
||||
vx_buf_free(src1_buf);
|
||||
}
|
||||
if (src2_buf) {
|
||||
vx_buf_release(src2_buf);
|
||||
vx_buf_free(src2_buf);
|
||||
}
|
||||
if (dst_buf) {
|
||||
vx_buf_release(dst_buf);
|
||||
vx_buf_free(dst_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int exitcode = 0;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -187,26 +190,26 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::dec << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, buf_size, &dst_buf));
|
||||
|
||||
for (int t = testid_s; t <= testid_e; ++t) {
|
||||
auto name = testMngr.get_name(t);
|
||||
@@ -226,18 +229,18 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
@@ -249,7 +252,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify destination
|
||||
std::cout << "verify test result" << std::endl;
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
typedef struct {
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t count = arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
@@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_points;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -4,8 +4,8 @@
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t* src_ptr = (uint32_t*)arg->src_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
|
||||
uint32_t* src_ptr = (uint32_t*)arg->src_addr;
|
||||
uint32_t* dst_ptr = (uint32_t*)arg->dst_addr;
|
||||
|
||||
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ std::vector<int32_t> ref_data;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -60,9 +61,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -105,7 +108,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -132,8 +135,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -150,7 +152,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
uint32_t num_points = count;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
|
||||
RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
|
||||
|
||||
// generate input data
|
||||
gen_input_data(num_points);
|
||||
@@ -171,15 +173,15 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
@@ -187,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||
std::max<uint32_t>(src_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t))));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -215,7 +217,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -225,7 +227,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -9,9 +9,9 @@ typedef struct {
|
||||
uint32_t num_tasks;
|
||||
uint32_t size;
|
||||
uint32_t stride;
|
||||
uint32_t addr_ptr;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t stride = arg->stride;
|
||||
uint32_t* addr_ptr = (uint32_t*)arg->addr_ptr;
|
||||
float* src_ptr = (float*)arg->src_ptr;
|
||||
float* dst_ptr = (float*)arg->dst_ptr;
|
||||
uint32_t* addr_ptr = (uint32_t*)arg->src0_addr;
|
||||
float* src_ptr = (float*)arg->src1_addr;
|
||||
float* dst_ptr = (float*)arg->dst_addr;
|
||||
|
||||
uint32_t offset = task_id * stride;
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ std::vector<uint32_t> addr_table;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -103,9 +104,12 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -140,7 +144,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -178,8 +182,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -219,19 +222,19 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, addr_buf_size, &value));
|
||||
kernel_arg.addr_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, addr_buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.stride = count;
|
||||
|
||||
std::cout << "dev_addr=" << std::hex << kernel_arg.addr_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_addr=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
@@ -239,7 +242,7 @@ int main(int argc, char *argv[]) {
|
||||
std::max<uint32_t>(addr_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t))));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -257,7 +260,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload address buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.addr_ptr, addr_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, addr_buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
@@ -267,7 +270,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -277,7 +280,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t size;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -7,8 +7,8 @@ void main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
uint32_t size = arg->size;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -125,21 +127,21 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.size = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -157,7 +159,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -167,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t size;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -7,8 +7,8 @@ void main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
uint32_t size = arg->size;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,7 +74,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -98,8 +101,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -125,21 +127,21 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.size = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -157,7 +159,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -167,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
typedef struct {
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src0_addr;
|
||||
uint32_t src1_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -11,9 +11,9 @@ void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
uint32_t offset = task_id * count;
|
||||
uint32_t num_blocks = (count * 4 + BLOCK_SIZE-1) / BLOCK_SIZE;
|
||||
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_ptr + offset;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_ptr + offset;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr + offset;
|
||||
int32_t* src0_ptr = (int32_t*)arg->src0_addr + offset;
|
||||
int32_t* src1_ptr = (int32_t*)arg->src1_addr + offset;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr + offset;
|
||||
|
||||
uint32_t src0_end = (uint32_t)(src0_ptr + count);
|
||||
uint32_t src1_end = (uint32_t)(src1_ptr + count);
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,12 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src0_addr);
|
||||
vx_mem_free(device, kernel_arg.src1_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,7 +75,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -98,8 +102,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -131,24 +134,24 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src0_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src1_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_addr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -166,7 +169,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_addr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
@@ -176,7 +179,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_addr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -186,7 +189,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_points;
|
||||
uint32_t src_ptr;
|
||||
uint32_t src_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(int task_id, kernel_arg_t* arg) {
|
||||
int* src_ptr = (int*)arg->src_ptr;
|
||||
int* src_ptr = (int*)arg->src_addr;
|
||||
vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]);
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ uint32_t count = 4;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -51,9 +52,10 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -71,8 +73,7 @@ int run_test() {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -103,17 +104,17 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -131,7 +132,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_points;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
|
||||
uint32_t num_points = arg->num_points;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
int32_t ref_value = src_ptr[task_id];
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ std::vector<int32_t> ref_data;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
@@ -55,9 +56,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -102,7 +105,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -129,8 +132,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
@@ -166,22 +168,22 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t)));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
||||
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
@@ -199,7 +201,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -209,7 +211,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
|
||||
@@ -35,6 +35,7 @@ ePixelFormat eformat = FORMAT_A8R8G8B8;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Texture Test." << std::endl;
|
||||
@@ -95,9 +96,11 @@ static void parse_args(int argc, char **argv) {
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
vx_buf_free(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
@@ -141,7 +144,6 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
kernel_arg_t kernel_arg;
|
||||
std::vector<uint8_t> src_pixels;
|
||||
std::vector<uint32_t> mip_offsets;
|
||||
uint32_t src_width;
|
||||
@@ -196,8 +198,8 @@ int main(int argc, char *argv[]) {
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
uint64_t src_addr, dst_addr;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr));
|
||||
|
||||
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
|
||||
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
||||
@@ -206,7 +208,7 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
|
||||
std::max<uint32_t>(src_bufsize, dst_bufsize));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &buffer));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user