fixed wspawn's warp synchronization
This commit is contained in:
@@ -76,7 +76,7 @@ inline void vx_join() {
|
|||||||
|
|
||||||
// Warp Barrier
|
// Warp Barrier
|
||||||
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
||||||
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
|
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return active warp's thread id
|
// Return active warp's thread id
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ typedef struct {
|
|||||||
int offset;
|
int offset;
|
||||||
int N;
|
int N;
|
||||||
int R;
|
int R;
|
||||||
|
int NW;
|
||||||
} wspawn_tasks_args_t;
|
} wspawn_tasks_args_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -25,6 +26,7 @@ typedef struct {
|
|||||||
int offset;
|
int offset;
|
||||||
int N;
|
int N;
|
||||||
int R;
|
int R;
|
||||||
|
int NW;
|
||||||
char isXYpow2;
|
char isXYpow2;
|
||||||
char isXpow2;
|
char isXpow2;
|
||||||
char log2XY;
|
char log2XY;
|
||||||
@@ -61,6 +63,9 @@ static void spawn_tasks_callback() {
|
|||||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait for all warps to complete
|
||||||
|
vx_barrier(0, p_wspawn_args->NW);
|
||||||
|
|
||||||
// set warp0 to single-threaded and stop other warps
|
// set warp0 to single-threaded and stop other warps
|
||||||
vx_tmc(0 == wid);
|
vx_tmc(0 == wid);
|
||||||
}
|
}
|
||||||
@@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
|||||||
fW = 1;
|
fW = 1;
|
||||||
|
|
||||||
//--
|
//--
|
||||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
|
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 };
|
||||||
g_wspawn_args[core_id] = &wspawn_args;
|
g_wspawn_args[core_id] = &wspawn_args;
|
||||||
|
|
||||||
//--
|
//--
|
||||||
if (nW >= 1) {
|
if (nW >= 1) {
|
||||||
int nw = MIN(nW, NW);
|
int nw = MIN(nW, NW);
|
||||||
|
wspawn_args.NW = nw;
|
||||||
vx_wspawn(nw, spawn_tasks_callback);
|
vx_wspawn(nw, spawn_tasks_callback);
|
||||||
spawn_tasks_callback();
|
spawn_tasks_callback();
|
||||||
}
|
}
|
||||||
@@ -168,6 +174,9 @@ static void spawn_kernel_callback() {
|
|||||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait for all warps to complete
|
||||||
|
vx_barrier(0, p_wspawn_args->NW);
|
||||||
|
|
||||||
// set warp0 to single-threaded and stop other warps
|
// set warp0 to single-threaded and stop other warps
|
||||||
vx_tmc(0 == wid);
|
vx_tmc(0 == wid);
|
||||||
}
|
}
|
||||||
@@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
|
|||||||
|
|
||||||
//--
|
//--
|
||||||
wspawn_kernel_args_t wspawn_args = {
|
wspawn_kernel_args_t wspawn_args = {
|
||||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X
|
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
|
||||||
};
|
};
|
||||||
g_wspawn_args[core_id] = &wspawn_args;
|
g_wspawn_args[core_id] = &wspawn_args;
|
||||||
|
|
||||||
//--
|
//--
|
||||||
if (nW >= 1) {
|
if (nW >= 1) {
|
||||||
int nw = MIN(nW, NW);
|
int nw = MIN(nW, NW);
|
||||||
|
wspawn_args.NW = nw;
|
||||||
vx_wspawn(nw, spawn_kernel_callback);
|
vx_wspawn(nw, spawn_kernel_callback);
|
||||||
spawn_kernel_callback();
|
spawn_kernel_callback();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ int main() {
|
|||||||
|
|
||||||
errors += test_tmask();
|
errors += test_tmask();
|
||||||
|
|
||||||
|
errors += test_barrier();
|
||||||
|
|
||||||
if (0 == errors) {
|
if (0 == errors) {
|
||||||
vx_printf("Passed!\n");
|
vx_printf("Passed!\n");
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ int test_tmc() {
|
|||||||
|
|
||||||
int wspawn_buffer[8];
|
int wspawn_buffer[8];
|
||||||
|
|
||||||
void simple_kernel() {
|
void wspawn_kernel() {
|
||||||
unsigned wid = vx_warp_id();
|
unsigned wid = vx_warp_id();
|
||||||
wspawn_buffer[wid] = 65 + wid;
|
wspawn_buffer[wid] = 65 + wid;
|
||||||
vx_tmc(0 == wid);
|
vx_tmc(0 == wid);
|
||||||
@@ -107,8 +107,8 @@ void simple_kernel() {
|
|||||||
int test_wsapwn() {
|
int test_wsapwn() {
|
||||||
vx_printf("Wspawn Test\n");
|
vx_printf("Wspawn Test\n");
|
||||||
int num_warps = std::min(vx_num_warps(), 8);
|
int num_warps = std::min(vx_num_warps(), 8);
|
||||||
vx_wspawn(num_warps, simple_kernel);
|
vx_wspawn(num_warps, wspawn_kernel);
|
||||||
simple_kernel();
|
wspawn_kernel();
|
||||||
|
|
||||||
return check_error(wspawn_buffer, num_warps);
|
return check_error(wspawn_buffer, num_warps);
|
||||||
}
|
}
|
||||||
@@ -217,3 +217,29 @@ l_start:
|
|||||||
|
|
||||||
return check_error(tmask_buffer, num_threads);
|
return check_error(tmask_buffer, num_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
int barrier_buffer[8];
|
||||||
|
volatile int barrier_ctr;
|
||||||
|
volatile int barrier_stall;
|
||||||
|
|
||||||
|
void barrier_kernel() {
|
||||||
|
unsigned wid = vx_warp_id();
|
||||||
|
for (int i = 0; i <= (wid * 256); ++i) {
|
||||||
|
++barrier_stall;
|
||||||
|
}
|
||||||
|
barrier_buffer[wid] = 65 + wid;
|
||||||
|
vx_barrier(0, barrier_ctr);
|
||||||
|
vx_tmc(0 == wid);
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_barrier() {
|
||||||
|
vx_printf("Barrier Test\n");
|
||||||
|
int num_warps = std::min(vx_num_warps(), 8);
|
||||||
|
barrier_ctr = num_warps;
|
||||||
|
barrier_stall = 0;
|
||||||
|
vx_wspawn(num_warps, barrier_kernel);
|
||||||
|
barrier_kernel();
|
||||||
|
return check_error(barrier_buffer, num_warps);
|
||||||
|
}
|
||||||
@@ -17,4 +17,6 @@ int test_spawn_tasks();
|
|||||||
|
|
||||||
int test_tmask();
|
int test_tmask();
|
||||||
|
|
||||||
|
int test_barrier();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user