diff --git a/tests/regression/sgemm_wg/kernel.cpp b/tests/regression/sgemm_wg/kernel.cpp index 44299934..78f056fa 100644 --- a/tests/regression/sgemm_wg/kernel.cpp +++ b/tests/regression/sgemm_wg/kernel.cpp @@ -12,63 +12,9 @@ #define TM 2 #define TN 2 -#define DEV_BARRIER_MMIO_BASE_ADDR 0xff003f00UL -#define CORES_PER_CLUSTER 2 -#define BARRIER_STRIDE 4 - void threadblock_barrier(unsigned int tid_in_threadblock, unsigned int barrier_id, unsigned int count) { - vx_barrier(barrier_id, count); vx_fence(); - - // vx_printf("========== barrier! barrier_id=%u, count=%u\n", barrier_id, count); - -#if CORES_PER_CLUSTER != 0 - // this code doesn't work without the memory-mapped register implemented in - // hardware, hence the #ifdef. - - if (tid_in_threadblock == 0) { - volatile uint32_t *mmio = (volatile uint32_t *)(DEV_BARRIER_MMIO_BASE_ADDR); - int core_id = vx_core_id(); - // FIXME: hardcoded - const uint32_t barrier_stride = BARRIER_STRIDE; - const uint32_t barrier_offset = barrier_stride * barrier_id; - - // wait for the barrier to be initialized - while (mmio[barrier_offset + 1 + core_id] != 0); - - // signal internal-core synchronization done - mmio[barrier_offset + 1 + core_id] = 1; - - // wait for other cores in the cluster to finish by waiting on the - // all-synced read-only mmio reg - while (mmio[barrier_offset] == 0); - - // need to signal that this core passed the barrier; otherwise, if we - // reset this to 0 right away, the other core still waiting for the - // barrier might never see the all-sync mmio reg as 1. - mmio[barrier_offset + 1 + core_id] = 2; - - // // if this core is the last one passing the barrier, reset all per-core - // // flags to 0 to get ready for the next barrier - // bool all_passed = true; - // for (int i = 0; i < CORES_PER_CLUSTER; i++) { - // // if (i == core_id) continue; - // // NOTE: this requires coherent access of store-to-load to the same - // // address - // if (mmio[barrier_offset + 1 + i] != 2) { - // all_passed = false; - // break; - // } - // } - // if (all_passed) { - // for (int i = 0; i < CORES_PER_CLUSTER; i++) { - // mmio[barrier_offset + 1 + i] = 0; - // } - // } - } - vx_barrier(barrier_id, count); -#endif } void thread_block_gemm(kernel_arg_t *__UNIFORM__ arg,