diff --git a/simX/Makefile b/simX/Makefile index 8b0e2ef3..e4cb9fbb 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -15,7 +15,7 @@ SRCS = util.cpp args.cpp mem.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execu # Debugigng ifdef DEBUG - CXXFLAGS += -DDEBUG_LEVEL=3 + CXXFLAGS += -DDEBUG_LEVEL=$(DEBUG) else CXXFLAGS += -DNDEBUG endif diff --git a/simX/execute.cpp b/simX/execute.cpp index c33f1b64..f255f66e 100644 --- a/simX/execute.cpp +++ b/simX/execute.cpp @@ -818,9 +818,10 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) { case 0: { // TMC tmask_.reset(); - for (size_t i = 0; i < tmask_.size(); ++i) { + for (int i = 0; i < num_threads; ++i) { tmask_[i] = rsdata[0] & (1 << i); } + D(3, "*** TMC " << tmask_); active_ = tmask_.any(); pipeline->stall_warp = true; runOnce = true; @@ -859,7 +860,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) { for (int i = 0; i < num_threads; ++i) DPN(3, e.tmask[num_threads-i-1]); DPN(3, ", PC=0x" << std::hex << e.PC << "\n"); } else { - D(3, "*** Unanimous pred: r" << rsrc0 << ", val: " << rsdata[0]); + D(3, "*** Unanimous pred"); DomStackEntry e(tmask_); e.unanimous = true; domStack_.push(e); diff --git a/tests/runtime/simple/main.cpp b/tests/runtime/simple/main.cpp index c0723fb0..df9fce85 100644 --- a/tests/runtime/simple/main.cpp +++ b/tests/runtime/simple/main.cpp @@ -4,8 +4,6 @@ int main() { int errors = 0; - vx_printf("Simple Test\n"); - errors += test_global_memory(); errors += test_stack_memory(); @@ -20,6 +18,8 @@ int main() { errors += test_spawn_tasks(); + errors += test_tmask(); + if (0 == errors) { vx_printf("Passed!\n"); } else { diff --git a/tests/runtime/simple/tests.cpp b/tests/runtime/simple/tests.cpp index 6db97399..0464ce04 100644 --- a/tests/runtime/simple/tests.cpp +++ b/tests/runtime/simple/tests.cpp @@ -1,10 +1,11 @@ #include "tests.h" #include +#include #include #include #include -int check_error(const int* buffer, int size) { +int __attribute__ ((noinline)) check_error(const int* buffer, int size) { int errors = 0; for (int i = 0; i < size; i++) { int value = buffer[i]; @@ -19,15 +20,21 @@ int check_error(const int* buffer, int size) { return errors; } +int __attribute__ ((noinline)) make_select_tmask(int tid) { + return (1 << tid); +} + +int __attribute__ ((noinline)) make_full_tmask(int num_threads) { + return (1 << num_threads) - 1; +} + /////////////////////////////////////////////////////////////////////////////// #define GLOBAL_MEM_SZ 8 int global_buffer[GLOBAL_MEM_SZ]; -int test_global_memory() { - int errors = 0; - - vx_printf("Global Memory test\n"); +int test_global_memory() { + vx_printf("Global Memory Test\n"); for (int i = 0; i < GLOBAL_MEM_SZ; i++) { global_buffer[i] = 65 + i; @@ -39,11 +46,10 @@ int test_global_memory() { /////////////////////////////////////////////////////////////////////////////// int test_stack_memory() { + vx_printf("Stack Memory Test\n"); + static const int STACK_MEM_SZ = 8; int stack_buffer[STACK_MEM_SZ]; - int errors = 0; - - vx_printf("Stack Memory test\n"); for (int i = 0; i < STACK_MEM_SZ; i++) { stack_buffer[i] = 65 + i; @@ -57,9 +63,8 @@ int test_stack_memory() { int test_shared_memory() { static const int SHARED_MEM_SZ = 8; int* shared_buffer = (int*)(SMEM_BASE_ADDR-(SMEM_SIZE-SHARED_MEM_SZ-4)); - int errors = 0; - vx_printf("Shared Memory test\n"); + vx_printf("Shared Memory Test\n"); for (int i = 0; i < SHARED_MEM_SZ; i++) { shared_buffer[i] = 65 + i; @@ -70,24 +75,28 @@ int test_shared_memory() { /////////////////////////////////////////////////////////////////////////////// -int tmc_buffer[NUM_THREADS]; +int tmc_buffer[8]; -int test_tmc() { - int errors = 0; - - vx_printf("Thread mask test\n"); - - vx_tmc(NUM_THREADS); +void __attribute__ ((noinline)) do_tmc() { unsigned tid = vx_thread_id(); tmc_buffer[tid] = 65 + tid; +} + +int test_tmc() { + vx_printf("TMC Test\n"); + + int num_threads = std::min(vx_num_threads(), 8); + int tmask = make_full_tmask(num_threads); + vx_tmc(tmask); + do_tmc(); vx_tmc(1); - return check_error(tmc_buffer, NUM_THREADS); + return check_error(tmc_buffer, num_threads); } /////////////////////////////////////////////////////////////////////////////// -int wspawn_buffer[NUM_WARPS]; +int wspawn_buffer[8]; void simple_kernel() { unsigned wid = vx_warp_id(); @@ -96,53 +105,53 @@ void simple_kernel() { } int test_wsapwn() { - vx_printf("test_wspawn\n"); - vx_wspawn(NUM_WARPS, simple_kernel); + vx_printf("Wspawn Test\n"); + int num_warps = std::min(vx_num_warps(), 8); + vx_wspawn(num_warps, simple_kernel); simple_kernel(); - return check_error(wspawn_buffer, NUM_WARPS); + return check_error(wspawn_buffer, num_warps); } /////////////////////////////////////////////////////////////////////////////// -#define DIV_BUF_SZ ((NUM_THREADS > 4) ? 4 : NUM_THREADS) -int div_buffer[DIV_BUF_SZ]; +int dvg_buffer[4]; -int test_divergence() { - int errors = 0; - - vx_printf("Control divergence test\n"); - - vx_tmc(DIV_BUF_SZ); +void __attribute__ ((noinline)) do_divergence() { unsigned tid = vx_thread_id(); - bool b = tid < 2; - __if (b) { - bool c = tid < 1; - __if (c) { - div_buffer[tid] = 65; + __if (tid < 2) { + __if (tid < 1) { + dvg_buffer[tid] = 65; } __else { - div_buffer[tid] = 66; + dvg_buffer[tid] = 66; } __endif } __else { - bool c = tid < 3; - __if (c) { - div_buffer[tid] = 67; + __if (tid < 3) { + dvg_buffer[tid] = 67; } __else { - div_buffer[tid] = 68; + dvg_buffer[tid] = 68; } __endif } __endif +} +int test_divergence() { + vx_printf("Control Divergence Test\n"); + + int num_threads = std::min(vx_num_threads(), 4); + int tmask = make_full_tmask(num_threads); + vx_tmc(tmask); + do_divergence(); vx_tmc(1); - return check_error(div_buffer, DIV_BUF_SZ); + return check_error(dvg_buffer, num_threads); } /////////////////////////////////////////////////////////////////////////////// @@ -162,14 +171,12 @@ void st_kernel(int task_id, void * arg) { } int test_spawn_tasks() { - int error = 0; + vx_printf("SpawnTasks Test\n"); st_args_t arg; arg.src = st_buffer_src; arg.dst = st_buffer_dst; - vx_printf("spawning %d tasks\n", ST_BUF_SZ); - for (int i = 0; i < ST_BUF_SZ; i++) { st_buffer_src[i] = 65 + i; } @@ -177,4 +184,36 @@ int test_spawn_tasks() { vx_spawn_tasks(ST_BUF_SZ, st_kernel, &arg); return check_error(st_buffer_dst, ST_BUF_SZ); +} + +/////////////////////////////////////////////////////////////////////////////// + +int tmask_buffer[8]; + +int __attribute__ ((noinline)) do_tmask() { + int tid = vx_thread_id(); + int tmask = make_select_tmask(tid); + int cur_tmask = vx_thread_mask(); + tmask_buffer[tid] = (cur_tmask == tmask) ? (65 + tid) : 0; + return tid + 1; +} + +int test_tmask() { + vx_printf("Thread Mask Test\n"); + + // activate all thread to populate shared variables + vx_tmc(-1); + + int num_threads = std::min(vx_num_threads(), 8); + int tid = 0; + +l_start: + int tmask = make_select_tmask(tid); + vx_tmc(tmask); + tid = do_tmask(); + if (tid < num_threads) + goto l_start; + vx_tmc(1); + + return check_error(tmask_buffer, num_threads); } \ No newline at end of file diff --git a/tests/runtime/simple/tests.h b/tests/runtime/simple/tests.h index e6b8b118..8830496d 100644 --- a/tests/runtime/simple/tests.h +++ b/tests/runtime/simple/tests.h @@ -15,4 +15,6 @@ int test_wsapwn(); int test_spawn_tasks(); +int test_tmask(); + #endif