fixed wspawn's warp synchronization

This commit is contained in:
Blaise Tine
2021-08-15 05:12:27 -07:00
parent a60bfc5e01
commit bc3fa0bb23
5 changed files with 46 additions and 6 deletions

View File

@@ -76,7 +76,7 @@ inline void vx_join() {
// Warp Barrier
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
}
// Return active warp's thread id

View File

@@ -16,6 +16,7 @@ typedef struct {
int offset;
int N;
int R;
int NW;
} wspawn_tasks_args_t;
typedef struct {
@@ -25,6 +26,7 @@ typedef struct {
int offset;
int N;
int R;
int NW;
char isXYpow2;
char isXpow2;
char log2XY;
@@ -61,6 +63,9 @@ static void spawn_tasks_callback() {
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
}
// wait for all warps to complete
vx_barrier(0, p_wspawn_args->NW);
// set warp0 to single-threaded and stop other warps
vx_tmc(0 == wid);
}
@@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
fW = 1;
//--
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 };
g_wspawn_args[core_id] = &wspawn_args;
//--
if (nW >= 1) {
int nw = MIN(nW, NW);
wspawn_args.NW = nw;
vx_wspawn(nw, spawn_tasks_callback);
spawn_tasks_callback();
}
@@ -168,6 +174,9 @@ static void spawn_kernel_callback() {
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
}
// wait for all warps to complete
vx_barrier(0, p_wspawn_args->NW);
// set warp0 to single-threaded and stop other warps
vx_tmc(0 == wid);
}
@@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
//--
wspawn_kernel_args_t wspawn_args = {
ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
};
g_wspawn_args[core_id] = &wspawn_args;
//--
if (nW >= 1) {
int nw = MIN(nW, NW);
wspawn_args.NW = nw;
vx_wspawn(nw, spawn_kernel_callback);
spawn_kernel_callback();
}