fixed wspawn's warp synchronization
This commit is contained in:
@@ -76,7 +76,7 @@ inline void vx_join() {
|
||||
|
||||
// Warp Barrier
|
||||
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
}
|
||||
|
||||
// Return active warp's thread id
|
||||
|
||||
@@ -16,6 +16,7 @@ typedef struct {
|
||||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
int NW;
|
||||
} wspawn_tasks_args_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -25,6 +26,7 @@ typedef struct {
|
||||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
int NW;
|
||||
char isXYpow2;
|
||||
char isXpow2;
|
||||
char log2XY;
|
||||
@@ -61,6 +63,9 @@ static void spawn_tasks_callback() {
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
@@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
||||
fW = 1;
|
||||
|
||||
//--
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_tasks_callback);
|
||||
spawn_tasks_callback();
|
||||
}
|
||||
@@ -168,6 +174,9 @@ static void spawn_kernel_callback() {
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
}
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
@@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
|
||||
|
||||
//--
|
||||
wspawn_kernel_args_t wspawn_args = {
|
||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X
|
||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
|
||||
};
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_kernel_callback);
|
||||
spawn_kernel_callback();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user