From bc3fa0bb235b8772eb316bf4639ad594f019e6bd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 15 Aug 2021 05:12:27 -0700 Subject: [PATCH] fixed wspawn's warp synchronization --- runtime/include/vx_intrinsics.h | 2 +- runtime/src/vx_spawn.c | 14 ++++++++++++-- tests/runtime/simple/main.cpp | 2 ++ tests/runtime/simple/tests.cpp | 32 +++++++++++++++++++++++++++++--- tests/runtime/simple/tests.h | 2 ++ 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/runtime/include/vx_intrinsics.h b/runtime/include/vx_intrinsics.h index cb780c3d..87a123ba 100644 --- a/runtime/include/vx_intrinsics.h +++ b/runtime/include/vx_intrinsics.h @@ -76,7 +76,7 @@ inline void vx_join() { // Warp Barrier inline void vx_barrier(unsigned barried_id, unsigned num_warps) { - asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps)); + asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps)); } // Return active warp's thread id diff --git a/runtime/src/vx_spawn.c b/runtime/src/vx_spawn.c index 4d1de2c4..94fd4a31 100644 --- a/runtime/src/vx_spawn.c +++ b/runtime/src/vx_spawn.c @@ -16,6 +16,7 @@ typedef struct { int offset; int N; int R; + int NW; } wspawn_tasks_args_t; typedef struct { @@ -25,6 +26,7 @@ typedef struct { int offset; int N; int R; + int NW; char isXYpow2; char isXpow2; char log2XY; @@ -61,6 +63,9 @@ static void spawn_tasks_callback() { (p_wspawn_args->callback)(task_id, p_wspawn_args->arg); } + // wait for all warps to complete + vx_barrier(0, p_wspawn_args->NW); + // set warp0 to single-threaded and stop other warps vx_tmc(0 == wid); } @@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) { fW = 1; //-- - wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW }; + wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 }; g_wspawn_args[core_id] = &wspawn_args; //-- if (nW >= 1) { int nw = MIN(nW, NW); + wspawn_args.NW = nw; vx_wspawn(nw, spawn_tasks_callback); spawn_tasks_callback(); } @@ -168,6 +174,9 @@ static void spawn_kernel_callback() { (p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2); } + // wait for all warps to complete + vx_barrier(0, p_wspawn_args->NW); + // set warp0 to single-threaded and stop other warps vx_tmc(0 == wid); } @@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void * //-- wspawn_kernel_args_t wspawn_args = { - ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X + ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X }; g_wspawn_args[core_id] = &wspawn_args; //-- if (nW >= 1) { int nw = MIN(nW, NW); + wspawn_args.NW = nw; vx_wspawn(nw, spawn_kernel_callback); spawn_kernel_callback(); } diff --git a/tests/runtime/simple/main.cpp b/tests/runtime/simple/main.cpp index df9fce85..56af9bb4 100644 --- a/tests/runtime/simple/main.cpp +++ b/tests/runtime/simple/main.cpp @@ -20,6 +20,8 @@ int main() { errors += test_tmask(); + errors += test_barrier(); + if (0 == errors) { vx_printf("Passed!\n"); } else { diff --git a/tests/runtime/simple/tests.cpp b/tests/runtime/simple/tests.cpp index 0464ce04..82a90732 100644 --- a/tests/runtime/simple/tests.cpp +++ b/tests/runtime/simple/tests.cpp @@ -98,7 +98,7 @@ int test_tmc() { int wspawn_buffer[8]; -void simple_kernel() { +void wspawn_kernel() { unsigned wid = vx_warp_id(); wspawn_buffer[wid] = 65 + wid; vx_tmc(0 == wid); @@ -107,8 +107,8 @@ void simple_kernel() { int test_wsapwn() { vx_printf("Wspawn Test\n"); int num_warps = std::min(vx_num_warps(), 8); - vx_wspawn(num_warps, simple_kernel); - simple_kernel(); + vx_wspawn(num_warps, wspawn_kernel); + wspawn_kernel(); return check_error(wspawn_buffer, num_warps); } @@ -216,4 +216,30 @@ l_start: vx_tmc(1); return check_error(tmask_buffer, num_threads); +} + +/////////////////////////////////////////////////////////////////////////////// + +int barrier_buffer[8]; +volatile int barrier_ctr; +volatile int barrier_stall; + +void barrier_kernel() { + unsigned wid = vx_warp_id(); + for (int i = 0; i <= (wid * 256); ++i) { + ++barrier_stall; + } + barrier_buffer[wid] = 65 + wid; + vx_barrier(0, barrier_ctr); + vx_tmc(0 == wid); +} + +int test_barrier() { + vx_printf("Barrier Test\n"); + int num_warps = std::min(vx_num_warps(), 8); + barrier_ctr = num_warps; + barrier_stall = 0; + vx_wspawn(num_warps, barrier_kernel); + barrier_kernel(); + return check_error(barrier_buffer, num_warps); } \ No newline at end of file diff --git a/tests/runtime/simple/tests.h b/tests/runtime/simple/tests.h index 8830496d..2424290d 100644 --- a/tests/runtime/simple/tests.h +++ b/tests/runtime/simple/tests.h @@ -17,4 +17,6 @@ int test_spawn_tasks(); int test_tmask(); +int test_barrier(); + #endif