Files
vortex/tests/kernel/conform/tests.cpp
Blaise Tine c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00

339 lines
7.1 KiB
C++

#include "tests.h"
#include <stdio.h>
#include <algorithm>
#include <vx_intrinsics.h>
#include <vx_print.h>
#include <vx_spawn.h>
int __attribute__((noinline)) check_error(const int* buffer, int offset, int size) {
int errors = 0;
for (int i = offset; i < size; i++) {
int value = buffer[i];
int ref_value = 65 + i;
if (value == ref_value) {
//PRINTF("[%d] %c\n", i, value);
} else {
PRINTF("*** error: [%d] 0x%x, expected 0x%x\n", i, value, ref_value);
++errors;
}
}
return errors;
}
int __attribute__((noinline)) make_select_tmask(int tid) {
return (1 << tid);
}
int __attribute__((noinline)) make_full_tmask(int num_threads) {
return (1 << num_threads) - 1;
}
///////////////////////////////////////////////////////////////////////////////
#define GLOBAL_MEM_SZ 8
int global_buffer[GLOBAL_MEM_SZ];
int test_global_memory() {
PRINTF("Global Memory Test\n");
for (int i = 0; i < GLOBAL_MEM_SZ; i++) {
global_buffer[i] = 65 + i;
}
return check_error(global_buffer, 0, GLOBAL_MEM_SZ);
}
///////////////////////////////////////////////////////////////////////////////
int* smem_addr = (int*)SMEM_BASE_ADDR;
int smem_buffer[8];
void __attribute__((noinline)) do_smem_wr() {
unsigned tid = vx_thread_id();
smem_addr[tid] = 65 + tid;
}
void __attribute__((noinline)) do_smem_rd() {
unsigned tid = vx_thread_id();
smem_buffer[tid] = smem_addr[tid];
}
int test_shared_memory() {
static const int SHARED_MEM_SZ = 8;
PRINTF("Shared Memory Test\n");
int num_threads = std::min(vx_num_threads(), 8);
int tmask = make_full_tmask(num_threads);
vx_tmc(tmask);
do_smem_wr();
do_smem_rd();
vx_tmc_one();
return check_error(smem_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int tmc_buffer[8];
void __attribute__((noinline)) do_tmc() {
unsigned tid = vx_thread_id();
tmc_buffer[tid] = 65 + tid;
}
int test_tmc() {
PRINTF("TMC Test\n");
int num_threads = std::min(vx_num_threads(), 8);
int tmask = make_full_tmask(num_threads);
vx_tmc(tmask);
do_tmc();
vx_tmc_one();
return check_error(tmc_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int pred_buffer[8];
void __attribute__((noinline)) do_pred() {
unsigned tid = vx_thread_id();
vx_pred((tid == 0), 1);
pred_buffer[tid] = 65;
}
int test_pred() {
PRINTF("PRED Test\n");
int num_threads = std::min(vx_num_threads(), 8);
int tmask = make_full_tmask(num_threads);
for (int i = 1; i < num_threads; i++) {
pred_buffer[i] = 65 + i;
}
vx_tmc(tmask);
do_pred();
vx_tmc_one();
return check_error(pred_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int wspawn_buffer[8];
void wspawn_kernel() {
unsigned wid = vx_warp_id();
wspawn_buffer[wid] = 65 + wid;
vx_tmc(0 == wid);
}
int test_wsapwn() {
PRINTF("Wspawn Test\n");
int num_warps = std::min(vx_num_warps(), 8);
vx_wspawn(num_warps, wspawn_kernel);
wspawn_kernel();
return check_error(wspawn_buffer, 0, num_warps);
}
///////////////////////////////////////////////////////////////////////////////
int dvg_buffer[4];
void __attribute__((noinline)) __attribute__((optimize("O1"))) do_divergence() {
int tid = vx_thread_id();
int cond1 = tid < 2;
int sp1 = vx_split(cond1);
if (cond1) {
{
int cond2 = tid < 1;
int sp2 = vx_split(cond2);
if (cond2) {
dvg_buffer[tid] = 65; // A
} else {
dvg_buffer[tid] = 66; // B
}
vx_join(sp2);
}
{
int cond3 = tid < 0;
int sp3 = vx_split(cond3);
if (cond3) {
dvg_buffer[tid] = 67; // C
}
vx_join(sp3);
}
} else {
{
int cond2 = tid < 3;
int sp2 = vx_split(cond2);
if (cond2) {
dvg_buffer[tid] = 67; // C
} else {
dvg_buffer[tid] = 68; // D
}
vx_join(sp2);
}
}
vx_join(sp1);
}
int test_divergence() {
PRINTF("Control Divergence Test\n");
int num_threads = std::min(vx_num_threads(), 4);
int tmask = make_full_tmask(num_threads);
vx_tmc(tmask);
do_divergence();
vx_tmc_one();
return check_error(dvg_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
#define ST_BUF_SZ 8
typedef struct {
int * src;
int * dst;
} st_args_t;
int st_buffer_src[ST_BUF_SZ];
int st_buffer_dst[ST_BUF_SZ];
void st_kernel(int task_id, const st_args_t * __UNIFORM__ arg) {
arg->dst[task_id] = arg->src[task_id];
}
int test_spawn_tasks() {
PRINTF("SpawnTasks Test\n");
st_args_t arg;
arg.src = st_buffer_src;
arg.dst = st_buffer_dst;
for (int i = 0; i < ST_BUF_SZ; i++) {
st_buffer_src[i] = 65 + i;
}
vx_spawn_tasks(ST_BUF_SZ, (vx_spawn_tasks_cb)st_kernel, &arg);
return check_error(st_buffer_dst, 0, ST_BUF_SZ);
}
///////////////////////////////////////////////////////////////////////////////
typedef struct {
int * buf;
} sr_args_t;
int sr_buffer[8];
void sr_kernel(const sr_args_t * arg) {
int tid = vx_thread_id();
arg->buf[tid] = 65 + tid;
}
void __attribute__((noinline)) do_serial() {
sr_args_t arg;
arg.buf = sr_buffer;
vx_serial((vx_serial_cb)sr_kernel, &arg);
}
int test_serial() {
PRINTF("Serial Test\n");
int num_threads = std::min(vx_num_threads(), 8);
int tmask = make_full_tmask(num_threads);
vx_tmc(tmask);
do_serial();
vx_tmc_one();
return check_error(sr_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int tmask_buffer[8];
int __attribute__((noinline)) do_tmask() {
int tid = vx_thread_id();
int tmask = make_select_tmask(tid);
int cur_tmask = vx_thread_mask();
tmask_buffer[tid] = (cur_tmask == tmask) ? (65 + tid) : 0;
return tid + 1;
}
int test_tmask() {
PRINTF("Thread Mask Test\n");
// activate all thread to populate shared variables
vx_tmc(-1);
int num_threads = std::min(vx_num_threads(), 8);
int tid = 0;
l_start:
int tmask = make_select_tmask(tid);
vx_tmc(tmask);
tid = do_tmask();
if (tid < num_threads)
goto l_start;
vx_tmc_one();
return check_error(tmask_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int barrier_buffer[8];
volatile int barrier_ctr;
volatile int barrier_stall;
void barrier_kernel() {
unsigned wid = vx_warp_id();
for (int i = 0; i <= (wid * 256); ++i) {
++barrier_stall;
}
barrier_buffer[wid] = 65 + wid;
vx_barrier(0, barrier_ctr);
vx_tmc(0 == wid);
}
int test_barrier() {
PRINTF("Barrier Test\n");
int num_warps = std::min(vx_num_warps(), 8);
barrier_ctr = num_warps;
barrier_stall = 0;
vx_wspawn(num_warps, barrier_kernel);
barrier_kernel();
return check_error(barrier_buffer, 0, num_warps);
}
///////////////////////////////////////////////////////////////////////////////
int tls_buffer[8];
__thread int tls_var;
__attribute__((noinline)) void print_tls_var() {
unsigned wid = vx_warp_id();
tls_buffer[wid] = 65 + tls_var;
}
void tls_kernel() {
unsigned wid = vx_warp_id();
tls_var = wid;
print_tls_var();
vx_tmc(0 == wid);
}
int test_tls() {
PRINTF("TLS Test\n");
int num_warps = std::min(vx_num_warps(), 8);
vx_wspawn(num_warps, tls_kernel);
tls_kernel();
return check_error(tls_buffer, 0, num_warps);
}