diff --git a/kernel/src/vx_spawn.c b/kernel/src/vx_spawn.c index 1d838c1f..759b915c 100644 --- a/kernel/src/vx_spawn.c +++ b/kernel/src/vx_spawn.c @@ -254,6 +254,7 @@ void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg vx_wspawn_wait(); } + // TODO: this is incomplete // TODO: Instead of launching an additional wave just to work on remaining // threads, handle this in the last wave amongst other full warps. if (rem_threads_in_last_warp != 0 && core_id_in_cluster == 0) { diff --git a/tests/kernel/tensor/check_correctness.py b/tests/kernel/tensor/check_correctness.py index de0c976a..13e28891 100644 --- a/tests/kernel/tensor/check_correctness.py +++ b/tests/kernel/tensor/check_correctness.py @@ -82,16 +82,23 @@ with open(file) as f: expected = np.load("abc.npz") -expected_A = expected['A_array'] -expected_B = expected['B_array'] -expected_C = expected['C_array'] +# expected_A = expected['A_array'] +# expected_B = expected['B_array'] +# expected_C = expected['C_array'] +expected_A = expected['A_array'][0:8, 0:8] +expected_B = expected['B_array'][0:8, 0:8] +expected_C = expected['C_array'][0:8, 0:8] expected_C = expected_C + expected_A @ expected_B +print('expected A:') +print(expected_A) +print('expected B:') +print(expected_B) print('expected C:') print(expected_C[0:8, 0:8]) print('got C:') print(C_array[0:8, 0:8]) print('diff C:') -print((expected_C - C_array)[0:8, 0:8]) +print(expected_C[0:8, 0:8] - C_array[0:8, 0:8]) expected_C.astype('float32').tofile("c_expected.bin")