diff --git a/hw/unittest/tensor/Makefile b/hw/unittest/tensor/Makefile
index 021b7dcb..a968ab14 100644
--- a/hw/unittest/tensor/Makefile
+++ b/hw/unittest/tensor/Makefile
@@ -35,7 +35,7 @@ SRCS += $(DPI_DIR)/float_dpi.cpp
 SRCS += $(SIM_DIR)/common/rvfloats.cpp
 SRCS += ./main.cpp
 
-RTL_SRCS += $(RTL_DIR)/fpu/VX_tensor_core.sv
+RTL_SRCS += $(RTL_DIR)/fpu/VX_tensor_dpu.sv
 RTL_SRCS += $(RTL_DIR)/fpu/VX_tensor_tb.sv
 
 TOP = VX_tensor_tb
diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk
index 7bf4b520..e276c624 100644
--- a/tests/kernel/common.mk
+++ b/tests/kernel/common.mk
@@ -33,7 +33,7 @@ $(PROJECT).dump: $(PROJECT).elf
 $(PROJECT).bin: $(PROJECT).elf
 	$(CP) -O binary $(PROJECT).elf $(PROJECT).bin
 
-$(PROJECT).elf: $(SRCS)
+$(PROJECT).elf: $(SRCS) $(DEPS)
 	$(CC) $(CFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT).elf
 
 run-rtlsim: $(PROJECT).bin
diff --git a/tests/kernel/tensor/check_correctness.py b/tests/kernel/tensor/check_correctness.py
index 84db43d3..c81212d0 100644
--- a/tests/kernel/tensor/check_correctness.py
+++ b/tests/kernel/tensor/check_correctness.py
@@ -86,8 +86,9 @@ expected_A = expected['A_array']
 expected_B = expected['B_array']
 expected_C = expected['C_array']
 expected_C = expected_C + expected_A @ expected_B
-
-print(expected_C - C_array)
+print(expected_C[0:8, 0:8])
+print(C_array[0:8, 0:8])
+print((expected_C - C_array)[0:8, 0:8])
 
 assert np.allclose(expected_A, A_array)
 assert np.allclose(expected_B, B_array)
diff --git a/tests/kernel/tensor/create_test_case.py b/tests/kernel/tensor/create_test_case.py
index 0fbd1583..35ad7d73 100644
--- a/tests/kernel/tensor/create_test_case.py
+++ b/tests/kernel/tensor/create_test_case.py
@@ -1,12 +1,15 @@
 import numpy as np
-# A_array = np.random.rand(16, 8)
-# B_array = np.random.rand(8, 16)
-A_array = np.zeros((16, 8))
-B_array = np.zeros((8, 16))
-A_array[0,:] = 1.0
-B_array[:,0] = 1.0
+A_array = np.random.rand(16, 8)
+B_array = np.random.rand(8, 16)
 C_array = np.random.rand(16, 16)
-
+# A_array = np.zeros((16, 8))
+# B_array = np.zeros((8, 16))
+# A_array[0,:] = 1.0
+# B_array[:,4] = 1.0
+# C_array = np.zeros((16, 16))
+# for i in range(16):
+#     for j in range(16):
+#         C_array[i,j] = i * 16 + j
 
 with open('a_matrix.h', 'w') as f:
     for i in range(A_array.shape[0]):