From df1f7f242a05d2d8fa21e3cd29994943545a121f Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 26 Mar 2024 23:51:59 -0700 Subject: [PATCH] vx_spawn.c: Implement spawn_tasks_cluster_rem_stub --- kernel/src/vx_spawn.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/kernel/src/vx_spawn.c b/kernel/src/vx_spawn.c index 87688e1c..fb36b0bc 100644 --- a/kernel/src/vx_spawn.c +++ b/kernel/src/vx_spawn.c @@ -102,14 +102,15 @@ static void __attribute__ ((noinline)) spawn_tasks_cluster_all_stub() { int wid = vx_warp_id(); int tid = vx_thread_id(); - const int core_id_in_cluster = vx_core_id() % CORES_PER_CLUSTER; - const int cluster_wid = CORES_PER_CLUSTER * wid + core_id_in_cluster; + const int core_id_in_cluster = cid % CORES_PER_CLUSTER; + // round-robin warp_id allocation across cores in cluster + const int wid_in_cluster = CORES_PER_CLUSTER * wid + core_id_in_cluster; wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid]; // FIXME: handle RW int waves = p_wspawn_args->NWs; - int offset = p_wspawn_args->offset + (NT * cluster_wid + tid); + int offset = p_wspawn_args->offset + (NT * wid_in_cluster + tid); vx_spawn_tasks_cb callback = p_wspawn_args->callback; void* arg = p_wspawn_args->arg; @@ -128,6 +129,25 @@ static void __attribute__ ((noinline)) spawn_tasks_rem_stub() { (p_wspawn_args->callback)(task_id, p_wspawn_args->arg); } +static void __attribute__ ((noinline)) spawn_tasks_cluster_rem_stub() { + int NT = vx_num_threads(); + int cid = vx_core_id(); + int tid = vx_thread_id(); + int wid = vx_warp_id(); + + const int core_id_in_cluster = cid % CORES_PER_CLUSTER; + // round-robin warp_id allocation across cores in cluster + const int wid_in_cluster = CORES_PER_CLUSTER * wid + core_id_in_cluster; + + wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid]; + // FIXME: This assumes that all cores but the last one are working with full + // warps, and only the last core has a partially-filled warp. + int offset = p_wspawn_args->offset + (NT * wid_in_cluster + tid); + + int task_id = offset; + (p_wspawn_args->callback)(task_id, p_wspawn_args->arg); +} + static void __attribute__ ((noinline)) spawn_tasks_cluster_all_cb() { // activate all threads vx_tmc(-1); @@ -224,8 +244,7 @@ void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg vx_tmc(tmask); // call stub routine - // FIXME: unimplemented for cluster! - spawn_tasks_rem_stub(); + spawn_tasks_cluster_rem_stub(); // back to single-threaded vx_tmc_one();