Set TENSOR_HOPPER to 1, add missing markers
This commit is contained in:
@@ -95,6 +95,8 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
|
|||||||
constexpr uint32_t quartile = (128 << 10) >> 2; // 128KB / 4
|
constexpr uint32_t quartile = (128 << 10) >> 2; // 128KB / 4
|
||||||
static_assert((quartile * 4) == SMEM_SIZE, "wrong quartile constant");
|
static_assert((quartile * 4) == SMEM_SIZE, "wrong quartile constant");
|
||||||
|
|
||||||
|
MARK_BEG();
|
||||||
|
|
||||||
constexpr uint32_t smem_a_offset = 0;
|
constexpr uint32_t smem_a_offset = 0;
|
||||||
constexpr uint32_t smem_a_dbuf_offset = 1 * quartile;
|
constexpr uint32_t smem_a_dbuf_offset = 1 * quartile;
|
||||||
constexpr uint32_t smem_b_offset =
|
constexpr uint32_t smem_b_offset =
|
||||||
@@ -119,6 +121,8 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
|
|||||||
threadblocks_per_cluster, threadblock_id_in_cluster,
|
threadblocks_per_cluster, threadblock_id_in_cluster,
|
||||||
sharedmem_per_threadblock);
|
sharedmem_per_threadblock);
|
||||||
|
|
||||||
|
MARK_END();
|
||||||
|
|
||||||
float *gmem_tmp_d0 = reinterpret_cast<float *>(0xd0000000UL);
|
float *gmem_tmp_d0 = reinterpret_cast<float *>(0xd0000000UL);
|
||||||
float *gmem_tmp_d1 = reinterpret_cast<float *>(0xd1000000UL);
|
float *gmem_tmp_d1 = reinterpret_cast<float *>(0xd1000000UL);
|
||||||
float *gmem_tmp_d2 = reinterpret_cast<float *>(0xd2000000UL);
|
float *gmem_tmp_d2 = reinterpret_cast<float *>(0xd2000000UL);
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ using float_type = float16_t;
|
|||||||
|
|
||||||
// Generate kernel for the Hopper-style SMEM-decoupled tensor core. This uses
|
// Generate kernel for the Hopper-style SMEM-decoupled tensor core. This uses
|
||||||
// asynchronous HGMMA and HGMMA_WAIT instructions.
|
// asynchronous HGMMA and HGMMA_WAIT instructions.
|
||||||
#define TENSOR_HOPPER 0
|
#define TENSOR_HOPPER 1
|
||||||
|
|
||||||
// Constraints on parameters:
|
// Constraints on parameters:
|
||||||
// * Memory:
|
// * Memory:
|
||||||
|
|||||||
Reference in New Issue
Block a user