46 lines
1.2 KiB
C++
46 lines
1.2 KiB
C++
#include "common_wu_min.h"
|
|
|
|
#define CASE05_BARRIER_ID 1u
|
|
|
|
extern "C" void __attribute__((naked, noinline, used)) tensor_worker() {
|
|
asm volatile(
|
|
"csrr x5, %[csr_wid]\n\t"
|
|
"li x1, (%[bar_id] | (%[domain_tensor] << %[domain_shift]))\n\t"
|
|
"li x2, %[num_tensor]\n\t"
|
|
".insn r %[custom0], 4, 0, x0, x1, x2\n\t"
|
|
"slli x6, x5, 2\n\t"
|
|
"la x7, g_seen\n\t"
|
|
"add x7, x7, x6\n\t"
|
|
"li x6, %[tensor_base]\n\t"
|
|
"or x6, x6, x5\n\t"
|
|
"sw x6, 0(x7)\n\t"
|
|
".insn r %[custom0], 0, 0, x0, x0, x0\n\t"
|
|
"1: j 1b\n\t"
|
|
:
|
|
: [csr_wid] "i"(VX_CSR_WARP_ID),
|
|
[custom0] "i"(RISCV_CUSTOM0),
|
|
[bar_id] "i"(CASE05_BARRIER_ID),
|
|
[domain_tensor] "i"(VX_BARRIER_DOMAIN_TENSOR),
|
|
[domain_shift] "i"(VX_BARRIER_DOMAIN_SHIFT),
|
|
[num_tensor] "i"(NUM_TENSOR_WARPS),
|
|
[tensor_base] "i"(WU_CASE_TENSOR_BASE)
|
|
: "memory");
|
|
}
|
|
|
|
extern "C" int wu_main() {
|
|
if (!wu_is_leader()) {
|
|
return 0;
|
|
}
|
|
|
|
wu_case_reset();
|
|
vx_spawn_tensor(vx_tensor_warp_mask(), tensor_worker);
|
|
|
|
if (wu_wait_seen_range(NUM_SCALAR_WARPS, NUM_WARPS, WU_CASE_TENSOR_BASE) != 0) {
|
|
wu_case_fail(0x05u);
|
|
return 1;
|
|
}
|
|
|
|
wu_case_pass();
|
|
return 0;
|
|
}
|