tensor: Change B in-memory layout to column-major

This commit is contained in:
Hansung Kim
2024-08-12 15:20:55 -07:00
parent 07dd9e35a0
commit 95e3e96c6c
6 changed files with 42 additions and 24 deletions

View File

@@ -572,7 +572,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
const uint32_t problem_size = (dim_m * dim_n) / (ELEM_PER_THREAD);
const uint32_t num_threadblocks = problem_size / threads_per_threadblock;
using float_type = float;
using float_type = float16_t;
// "static" shared memory allocation. This would determine threadblock
// occupancy of a single cluster