sgemm_tcore: Fix smem allocation for non-dma
This commit is contained in:
@@ -542,8 +542,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
|
|||||||
// "static" shared memory allocation. This would determine threadblock
|
// "static" shared memory allocation. This would determine threadblock
|
||||||
// occupancy of a single cluster
|
// occupancy of a single cluster
|
||||||
float *sharedmem_per_threadblock =
|
float *sharedmem_per_threadblock =
|
||||||
(float *)DEV_SMEM_START_ADDR + (GEMMINI_DMA ? 2 /*double-buffer*/ : 1) *
|
(float *)DEV_SMEM_START_ADDR + 2/*overkill for non-dma*/ * (2 * BM * BK) *
|
||||||
(2 * BM * BK) *
|
|
||||||
threadblock_id_in_cluster;
|
threadblock_id_in_cluster;
|
||||||
|
|
||||||
thread_block_gemm(arg, tid_in_threadblock, threads_per_threadblock,
|
thread_block_gemm(arg, tid_in_threadblock, threads_per_threadblock,
|
||||||
|
|||||||
Reference in New Issue
Block a user