sgemm_impl: Mark threadblock_barrier convergent

Thank you Chris Lattner
This commit is contained in:
Hansung Kim
2024-09-08 22:49:38 -07:00
parent 714b9f501e
commit 1f51f7f9d4

View File

@@ -536,7 +536,8 @@ wmma_store(const int thread_in_warp, const int warp_col, const int warp_row,
asm volatile ("wmma_store_finish_%=:" :: );
}
inline void threadblock_barrier(const uint32_t barrier_id, const uint32_t count) {
__attribute__((convergent)) inline void
threadblock_barrier(const uint32_t barrier_id, const uint32_t count) {
vx_fence();
vx_barrier(barrier_id, count);
}