flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference#
flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference
#include "ck_tile/core.hpp"#include "ck_tile/ops/gemm/warp/warp_gemm.hpp"#include "ck_tile/ops/flatmm/block/flatmm_uk_config.hpp"#include "uk/flatmm_uk_gfx9_32x512x128_1x1x1_16x16x16.inc"Go to the source code of this file.
Classes | |
| struct | ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base |
| struct | ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_BF16 |
| struct | ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16 |
Namespaces | |
| namespace | ck_tile |
Macros | |
| #define | _EXPAND_ASM_ARGS_OUT_ONE_ACC |
| #define | _EXPAND_ASM_ARGS_OUT_TWO_ACC |
| #define | _EXPAND_ASM_ARGS_IN |
| #define | _EXPAND_ASM_ARGS_CLOBBER |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |
| #define | CK_TILE_FLATMM_UK_2B 1 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | _UK_GLD_A0 |
| #define | _UK_GLD_A1 |
| #define | _UK_GLD_A2 |
| #define | _UK_GLD_A3 |
| #define | _UK_GLD_A4 |
| #define | _UK_GLD_A5 |
| #define | _UK_GLD_A6 |
| #define | _UK_GLD_A7_AND_L1 |
| #define | _UK_GLD_A7_AND_L0 |
| #define | _UK_NONE "" |
| #define | _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | CK_TILE_FLATMM_UK_2B 0 |
| #define | _UK_GLD_A0 |
| #define | _UK_GLD_A1 |
| #define | _UK_GLD_A2 |
| #define | _UK_GLD_A3 |
| #define | _UK_GLD_A4 |
| #define | _UK_GLD_A5 |
| #define | _UK_GLD_A6 |
| #define | _UK_GLD_A7_AND_L1 |
| #define | _UK_GLD_A7_AND_L0 |
| #define | _UK_NONE "" |
| #define | _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
| #define | CK_TILE_FLATMM_UK_2B 1 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | _UK_GLD_A0 |
| #define | _UK_GLD_A1 |
| #define | _UK_GLD_A2 |
| #define | _UK_GLD_A3 |
| #define | _UK_GLD_A4 |
| #define | _UK_GLD_A5 |
| #define | _UK_GLD_A6 |
| #define | _UK_GLD_A7_AND_L1 |
| #define | _UK_GLD_A7_AND_L0 |
| #define | _UK_NONE "" |
| #define | _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | CK_TILE_FLATMM_UK_2B 0 |
| #define | _UK_GLD_A0 |
| #define | _UK_GLD_A1 |
| #define | _UK_GLD_A2 |
| #define | _UK_GLD_A3 |
| #define | _UK_GLD_A4 |
| #define | _UK_GLD_A5 |
| #define | _UK_GLD_A6 |
| #define | _UK_GLD_A7_AND_L1 |
| #define | _UK_GLD_A7_AND_L0 |
| #define | _UK_NONE "" |
| #define | _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
| #define | _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
| #define | _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
| #define | _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
| #define | _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
| #define | _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
| #define | _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
| #define | _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
| #define | _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
| #define | _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_) |
Macro Definition Documentation
◆ _EXPAND_ASM_ARGS_CLOBBER
| #define _EXPAND_ASM_ARGS_CLOBBER |
◆ _EXPAND_ASM_ARGS_IN
| #define _EXPAND_ASM_ARGS_IN |
◆ _EXPAND_ASM_ARGS_OUT_ONE_ACC
| #define _EXPAND_ASM_ARGS_OUT_ONE_ACC |
Value:
[s_loop_cnt]"+s"(loop_cnt), \
[v_acc_0]"+v"(v_acc[0]), \
[v_acc_1]"+v"(v_acc[1]), \
[v_acc_2]"+v"(v_acc[2]), \
[v_acc_3]"+v"(v_acc[3]), \
[v_acc_4]"+v"(v_acc[4]), \
[v_acc_5]"+v"(v_acc[5]), \
[v_acc_6]"+v"(v_acc[6]), \
[v_acc_7]"+v"(v_acc[7]), \
[v_acc_8]"+v"(v_acc[8]), \
[v_acc_9]"+v"(v_acc[9]), \
[v_acc_10]"+v"(v_acc[10]), \
[v_acc_11]"+v"(v_acc[11]), \
[v_acc_12]"+v"(v_acc[12]), \
[v_acc_13]"+v"(v_acc[13]), \
[v_acc_14]"+v"(v_acc[14]), \
[v_acc_15]"+v"(v_acc[15]), \
[s_mem_]"+r"(smem)
◆ _EXPAND_ASM_ARGS_OUT_TWO_ACC
| #define _EXPAND_ASM_ARGS_OUT_TWO_ACC |
◆ _UK_GLD_A0 [1/4]
| #define _UK_GLD_A0 |
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A0 [2/4]
| #define _UK_GLD_A0 |
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A0 [3/4]
| #define _UK_GLD_A0 |
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A0 [4/4]
| #define _UK_GLD_A0 |
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A1 [1/4]
| #define _UK_GLD_A1 |
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A1 [2/4]
| #define _UK_GLD_A1 |
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A1 [3/4]
| #define _UK_GLD_A1 |
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A1 [4/4]
| #define _UK_GLD_A1 |
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A2 [1/4]
| #define _UK_GLD_A2 |
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A2 [2/4]
| #define _UK_GLD_A2 |
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A2 [3/4]
| #define _UK_GLD_A2 |
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A2 [4/4]
| #define _UK_GLD_A2 |
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A3 [1/4]
| #define _UK_GLD_A3 |
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A3 [2/4]
| #define _UK_GLD_A3 |
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A3 [3/4]
| #define _UK_GLD_A3 |
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A3 [4/4]
| #define _UK_GLD_A3 |
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A4 [1/4]
| #define _UK_GLD_A4 |
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A4 [2/4]
| #define _UK_GLD_A4 |
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A4 [3/4]
| #define _UK_GLD_A4 |
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A4 [4/4]
| #define _UK_GLD_A4 |
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A5 [1/4]
| #define _UK_GLD_A5 |
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A5 [2/4]
| #define _UK_GLD_A5 |
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A5 [3/4]
| #define _UK_GLD_A5 |
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A5 [4/4]
| #define _UK_GLD_A5 |
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A6 [1/4]
| #define _UK_GLD_A6 |
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A6 [2/4]
| #define _UK_GLD_A6 |
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A6 [3/4]
| #define _UK_GLD_A6 |
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A6 [4/4]
| #define _UK_GLD_A6 |
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"
◆ _UK_GLD_A7_AND_L0 [1/4]
| #define _UK_GLD_A7_AND_L0 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L0 [2/4]
| #define _UK_GLD_A7_AND_L0 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L0 [3/4]
| #define _UK_GLD_A7_AND_L0 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L0 [4/4]
| #define _UK_GLD_A7_AND_L0 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L1 [1/4]
| #define _UK_GLD_A7_AND_L1 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L1 [2/4]
| #define _UK_GLD_A7_AND_L1 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L1 [3/4]
| #define _UK_GLD_A7_AND_L1 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"
◆ _UK_GLD_A7_AND_L1 [4/4]
| #define _UK_GLD_A7_AND_L1 |
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"
◆ _UK_MFMA_ [1/4]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_MFMA_ [2/4]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_MFMA_ [3/4]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_MFMA_ [4/4]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_NONE [1/4]
| #define _UK_NONE "" |
◆ _UK_NONE [2/4]
| #define _UK_NONE "" |
◆ _UK_NONE [3/4]
| #define _UK_NONE "" |
◆ _UK_NONE [4/4]
| #define _UK_NONE "" |
◆ _UK_PIPELINE_0 [1/4]
| #define _UK_PIPELINE_0 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_0 [2/4]
| #define _UK_PIPELINE_0 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_0 [3/4]
| #define _UK_PIPELINE_0 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_0 [4/4]
| #define _UK_PIPELINE_0 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_1 [1/4]
| #define _UK_PIPELINE_1 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_1 [2/4]
| #define _UK_PIPELINE_1 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_1 [3/4]
| #define _UK_PIPELINE_1 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_PIPELINE_1 [4/4]
| #define _UK_PIPELINE_1 | ( | gld_a0_, | |
| gld_a1_, | |||
| gld_a2_, | |||
| gld_a3_, | |||
| gld_a4_, | |||
| gld_a5_, | |||
| gld_a6_, | |||
| gld_a7_, | |||
| sld_a0_, | |||
| sld_a1_, | |||
| sld_a2_, | |||
| sld_a3_, | |||
| sld_a4_, | |||
| sld_a5_, | |||
| sld_a6_, | |||
| sld_a7_, | |||
| src_a0_, | |||
| src_a1_, | |||
| src_a2_, | |||
| src_a3_, | |||
| src_a4_, | |||
| src_a5_, | |||
| src_a6_, | |||
| src_a7_, | |||
| src_a8_, | |||
| src_a9_, | |||
| src_a10_, | |||
| src_a11_, | |||
| src_a12_, | |||
| src_a13_, | |||
| src_a14_, | |||
| src_a15_, | |||
| fence_0_, | |||
| fence_1_, | |||
| fence_2_, | |||
| fence_3_, | |||
| acc_0_, | |||
| acc_1_, | |||
| acc_2_, | |||
| acc_3_, | |||
| acc_4_, | |||
| acc_5_, | |||
| acc_6_, | |||
| acc_7_, | |||
| acc_8_, | |||
| acc_9_, | |||
| acc_10_, | |||
| acc_11_, | |||
| acc_12_, | |||
| acc_13_, | |||
| acc_14_, | |||
| acc_15_, | |||
| s_base_b_ ) |
◆ _UK_SLD_A0_X [1/4]
| #define _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_X [2/4]
| #define _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_X [3/4]
| #define _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_X [4/4]
| #define _UK_SLD_A0_X "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_Y [1/4]
| #define _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_Y [2/4]
| #define _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_Y [3/4]
| #define _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A0_Y [4/4]
| #define _UK_SLD_A0_Y "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n" |
◆ _UK_SLD_A1_X [1/4]
| #define _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_X [2/4]
| #define _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_X [3/4]
| #define _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_X [4/4]
| #define _UK_SLD_A1_X "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_Y [1/4]
| #define _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_Y [2/4]
| #define _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_Y [3/4]
| #define _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A1_Y [4/4]
| #define _UK_SLD_A1_Y "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n" |
◆ _UK_SLD_A2_X [1/4]
| #define _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_X [2/4]
| #define _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_X [3/4]
| #define _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_X [4/4]
| #define _UK_SLD_A2_X "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_Y [1/4]
| #define _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_Y [2/4]
| #define _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_Y [3/4]
| #define _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A2_Y [4/4]
| #define _UK_SLD_A2_Y "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n" |
◆ _UK_SLD_A3_X [1/4]
| #define _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_X [2/4]
| #define _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_X [3/4]
| #define _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_X [4/4]
| #define _UK_SLD_A3_X "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_Y [1/4]
| #define _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_Y [2/4]
| #define _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_Y [3/4]
| #define _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A3_Y [4/4]
| #define _UK_SLD_A3_Y "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n" |
◆ _UK_SLD_A4_X [1/4]
| #define _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_X [2/4]
| #define _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_X [3/4]
| #define _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_X [4/4]
| #define _UK_SLD_A4_X "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_Y [1/4]
| #define _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_Y [2/4]
| #define _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_Y [3/4]
| #define _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A4_Y [4/4]
| #define _UK_SLD_A4_Y "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n" |
◆ _UK_SLD_A5_X [1/4]
| #define _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_X [2/4]
| #define _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_X [3/4]
| #define _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_X [4/4]
| #define _UK_SLD_A5_X "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_Y [1/4]
| #define _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_Y [2/4]
| #define _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_Y [3/4]
| #define _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A5_Y [4/4]
| #define _UK_SLD_A5_Y "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n" |
◆ _UK_SLD_A6_X [1/4]
| #define _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_X [2/4]
| #define _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_X [3/4]
| #define _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_X [4/4]
| #define _UK_SLD_A6_X "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_Y [1/4]
| #define _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_Y [2/4]
| #define _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_Y [3/4]
| #define _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A6_Y [4/4]
| #define _UK_SLD_A6_Y "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n" |
◆ _UK_SLD_A7_X [1/4]
| #define _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_X [2/4]
| #define _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_X [3/4]
| #define _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_X [4/4]
| #define _UK_SLD_A7_X "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_Y [1/4]
| #define _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_Y [2/4]
| #define _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_Y [3/4]
| #define _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
◆ _UK_SLD_A7_Y [4/4]
| #define _UK_SLD_A7_Y "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n" |
◆ CK_TILE_FLATMM_UK_2B [1/4]
| #define CK_TILE_FLATMM_UK_2B 1 |
◆ CK_TILE_FLATMM_UK_2B [2/4]
| #define CK_TILE_FLATMM_UK_2B 1 |
◆ CK_TILE_FLATMM_UK_2B [3/4]
| #define CK_TILE_FLATMM_UK_2B 0 |
◆ CK_TILE_FLATMM_UK_2B [4/4]
| #define CK_TILE_FLATMM_UK_2B 0 |
◆ CK_TILE_FLATMM_UK_MFMA [1/4]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
◆ CK_TILE_FLATMM_UK_MFMA [2/4]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
◆ CK_TILE_FLATMM_UK_MFMA [3/4]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |
◆ CK_TILE_FLATMM_UK_MFMA [4/4]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |