warp_gemm_dispatcher.hpp Source File

warp_gemm_dispatcher.hpp Source File#

Composable Kernel: warp_gemm_dispatcher.hpp Source File
warp_gemm_dispatcher.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include "ck_tile/core.hpp"
9
10namespace ck_tile {
11
12namespace impl {
13template <typename AType,
14 typename BType,
15 typename AccType,
16 index_t MPerWave,
17 index_t NPerWave,
18 index_t KPerWave,
19 bool TransposeC,
20 bool SwizzleA = false,
21 bool UseStructuredSparsity = false,
24
25// clang-format off
26// fp32
27// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
28template<> struct WarpGemmDispatcher<float, float, float, 16, 16, 4, false> { using Type = WarpGemmMfmaF32F32F32M16N16K4; };
29template<> struct WarpGemmDispatcher<float, float, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF32F32F32M16N16K16<>; };
30template<> struct WarpGemmDispatcher<float, float, float, 16, 16, 16, true> { using Type = WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution<>; };
31// fp16
32// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
33template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaF16F16F32M32N32K8; };
35template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaF16F16F32M32N32K16<>; };
41template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaF16F16F32M16N16K32<>; };
47template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaF16F16F32M4N64K16; };
48template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaF16F16F32M64N4K16; };
49// WMMA cases
50#if defined(__gfx11__) || defined(__gfx12__)
51template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f16_f16<TransposeC>;};
52#else
53template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF16F16F32M16N16K16; };
55#endif
56
57template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8SwizzleA; };
58template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16SwizzleA; };
61
62// fp16 2:4 structural sparsity
63// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
64template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 32, 32, 16, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M32N32K16; };
65template<> struct WarpGemmDispatcher<ck_tile::half_t, ck_tile::half_t, float, 16, 16, 32, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M16N16K32; };
66
67// bf16
68// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
69template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8; };
71template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16<>; };
77template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32<>; };
83template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M4N64K16; };
84template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M64N4K16; };
85// WMMA cases
86#if defined(__gfx11__) || defined(__gfx12__)
87template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf16_bf16<TransposeC>; };
88#else
89template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K16; };
91#endif
92
93template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA; };
94template<> struct WarpGemmDispatcher<ck_tile::bf16_t, ck_tile::bf16_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA; };
97
98// fp8
99// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
100template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8; };
101template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_fp8; };
102template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8; };
103template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_fp8_fp8; };
104template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed; };
105template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed; };
106template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8; };
107template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed; };
108template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_bf8; };
109template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_bf8; };
110template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8; };
111template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed; };
112template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8; };
113template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_bf8_bf8; };
114template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8; };
115template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed; };
116template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_bf8_bf8; };
117template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed; };
118template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_fp8_fp8<>; };
119template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_fp8_bf8<>; };
120template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::fp8_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_bf8_fp8<>; };
121template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_bf8_bf8<>; };
126
127template<> struct WarpGemmDispatcher<ck_tile::pk_fp4_t, ck_tile::pk_fp4_t, float, 16, 16, 128, false> { using Type = WarpGemmMfma_f32_16x16x128_fp4<>; };
128
129template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_fp8<>; };
130template<> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_bf8<>; };
131template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_fp8<>; };
132template<> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_bf8<>; };
141
150
153
154//WMMA cases
155template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::fp8_t, float, 16, 16, 16, TransposeC, false> { using Type =WarpGemmWmma_f32_16x16x16_f8_f8<TransposeC>; };
156template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::bf8_t, float, 16, 16, 16, TransposeC, false> { using Type =WarpGemmWmma_f32_16x16x16_bf8_bf8<TransposeC>; };
157template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::fp8_t, ck_tile::bf8_t, float, 16, 16, 16, TransposeC, false> { using Type =WarpGemmWmma_f32_16x16x16_f8_bf8<TransposeC>; };
158template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::bf8_t, ck_tile::fp8_t, float, 16, 16, 16, TransposeC, false> { using Type =WarpGemmWmma_f32_16x16x16_bf8_f8<TransposeC>; };
159
160// int8
161// ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
166// WMMA cases
167template<bool TransposeC> struct WarpGemmDispatcher<ck_tile::int8_t, ck_tile::int8_t, int32_t, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_i32_16x16x16_i8_i8<TransposeC>;};
168
169// clang-format on
170} // namespace impl
171
172template <typename AType,
173 typename BType,
174 typename AccType,
175 index_t MPerWave,
176 index_t NPerWave,
177 index_t KPerWave,
178 bool TransposeC,
179 bool SwizzleA = false,
180 bool UseStructuredSparsity = false,
183 BType,
184 AccType,
185 MPerWave,
186 NPerWave,
187 KPerWave,
188 TransposeC,
189 SwizzleA,
190 UseStructuredSparsity,
191 AttrNumAccess>::Type;
192
193} // namespace ck_tile
Definition tile/core/arch/amd_buffer_addressing.hpp:110
Definition tile/core/algorithm/cluster_descriptor.hpp:13
WGAttrNumAccessEnum
Definition warp_gemm_attribute_mfma.hpp:13
@ Single
Definition warp_gemm_attribute_mfma.hpp:14
@ Double
Definition warp_gemm_attribute_mfma.hpp:15
@ Quad
Definition warp_gemm_attribute_mfma.hpp:16
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_fp8
Definition warp_gemm.hpp:324
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed
Definition warp_gemm.hpp:389
typename impl::WarpGemmDispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition warp_gemm_dispatcher.hpp:182
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_bf8_bf8
Definition warp_gemm.hpp:294
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_fp8
Definition warp_gemm.hpp:260
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M16N16K16
Definition warp_gemm.hpp:159
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_fp8
Definition warp_gemm.hpp:314
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_fp8_bf8
Definition warp_gemm.hpp:363
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_fp8
Definition warp_gemm.hpp:275
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_16x16x32_i8_i8
Definition warp_gemm.hpp:408
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M16N16K32
Definition warp_gemm.hpp:182
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M16N16K32< WGAttrCtlEnum::Default_ > > > WarpGemmSmfmacF16F16F32M16N16K32
Definition warp_gemm.hpp:152
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M4N64K16
Definition warp_gemm.hpp:140
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_bf16_bf16, kTransC > > WarpGemmWmma_f32_16x16x16_bf16_bf16
Definition warp_wmma_gemm.hpp:19
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed
Definition warp_gemm.hpp:411
_Float16 half_t
Definition half.hpp:111
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_f8_f8, kTransC > > WarpGemmWmma_f32_16x16x16_f8_f8
Definition warp_wmma_gemm.hpp:27
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed
Definition warp_gemm.hpp:404
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed
Definition warp_gemm.hpp:290
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution
Definition warp_gemm.hpp:197
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M4N64K16
Definition warp_gemm.hpp:250
int8_t int8_t
Definition int8.hpp:20
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaScaleImpl_f32_16x16x128_fp4< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp4
Definition warp_gemm.hpp:310
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_bf8_bf8, kTransC > > WarpGemmWmma_f32_16x16x16_bf8_bf8
Definition warp_wmma_gemm.hpp:31
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed
Definition warp_gemm.hpp:297
bfloat16_t bf16_t
Definition bfloat16.hpp:113
_BitInt(8) fp8_t
Definition float8.hpp:204
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M16N16K16
Definition warp_gemm.hpp:38
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:235
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_fp8_fp8
Definition warp_gemm.hpp:358
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed
Definition warp_gemm.hpp:377
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_bf8
Definition warp_gemm.hpp:283
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M64N4K16
Definition warp_gemm.hpp:144
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M64N4K16
Definition warp_gemm.hpp:254
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_fp8
Definition warp_gemm.hpp:287
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M32N32K16
Definition warp_gemm.hpp:48
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M16N16K32
Definition warp_gemm.hpp:61
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M32N32K16
Definition warp_gemm.hpp:169
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:134
pk_float4_e2m1_t pk_fp4_t
Definition pk_fp4.hpp:151
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_bf8
Definition warp_gemm.hpp:329
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_i32_16x16x16_i8_i8, kTransC > > WarpGemmWmma_i32_16x16x16_i8_i8
Definition warp_wmma_gemm.hpp:23
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:201
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA
Definition warp_gemm.hpp:192
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_fp8_CTransposed
Definition warp_gemm.hpp:334
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_bf8_fp8
Definition warp_gemm.hpp:368
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:79
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution
Definition warp_gemm.hpp:91
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8
Definition warp_gemm.hpp:156
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleA
Definition warp_gemm.hpp:71
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_bf8_bf8
Definition warp_gemm.hpp:279
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_bf8_bf8
Definition warp_gemm.hpp:305
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_f16_f16, kTransC > > WarpGemmWmma_f32_16x16x16_f16_f16
Definition warp_wmma_gemm.hpp:15
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:244
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaF16F16F32M32N32K8SwizzleA
Definition warp_gemm.hpp:67
int32_t int32_t
Definition integer.hpp:10
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution
Definition warp_gemm.hpp:228
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M32N32K16< WGAttrCtlEnum::Default_ > > > WarpGemmSmfmacF16F16F32M32N32K16
Definition warp_gemm.hpp:149
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_bf8_bf8
Definition warp_gemm.hpp:373
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_f8_bf8, kTransC > > WarpGemmWmma_f32_16x16x16_f8_bf8
Definition warp_wmma_gemm.hpp:35
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_bf8
Definition warp_gemm.hpp:266
unsigned _BitInt(8) bf8_t
Definition float8.hpp:206
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ >, 4, AttrNumAccess > > WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:27
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8
Definition warp_gemm.hpp:35
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution
Definition warp_gemm.hpp:213
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:125
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution
Definition warp_gemm.hpp:106
WarpGemmImpl< WarpGemmAttributeWmma< WarpGemmAttributeWmmaImpl_f32_16x16x16_bf8_f8, kTransC > > WarpGemmWmma_f32_16x16x16_bf8_f8
Definition warp_wmma_gemm.hpp:39
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF32F32F32M16N16K4
Definition warp_gemm.hpp:17
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_bf8
Definition warp_gemm.hpp:319
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_fp8_CTransposed
Definition warp_gemm.hpp:346
int32_t index_t
Definition integer.hpp:9
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_bf8_CTransposed
Definition warp_gemm.hpp:340
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed
Definition warp_gemm.hpp:381
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA
Definition warp_gemm.hpp:188
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution
Definition warp_gemm.hpp:75
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ >, 4, AttrNumAccess > > WarpGemmMfmaF32F32F32M16N16K16
Definition warp_gemm.hpp:21
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed
Definition warp_gemm.hpp:385
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_32x32x16_i8_i8
Definition warp_gemm.hpp:401
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_fp8_fp8
Definition warp_gemm.hpp:301
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_bf8_CTransposed
Definition warp_gemm.hpp:352
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_bf8
Definition warp_gemm.hpp:263
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_bf8
Definition warp_gemm.hpp:272
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_fp8
Definition warp_gemm.hpp:269
Type
Type of JSON value.
Definition rapidjson.h:760
WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:70
WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:90
WarpGemmMfmaBf16Bf16F32M16N16K16 Type
Definition warp_gemm_dispatcher.hpp:89
WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:76
WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA Type
Definition warp_gemm_dispatcher.hpp:93
WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution<> Type
Definition warp_gemm_dispatcher.hpp:78
WarpGemmMfmaBf16Bf16F32M32N32K8 Type
Definition warp_gemm_dispatcher.hpp:69
WarpGemmMfmaBf16Bf16F32M32N32K16< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:74
WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:95
WarpGemmMfmaBf16Bf16F32M4N64K16 Type
Definition warp_gemm_dispatcher.hpp:83
WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA Type
Definition warp_gemm_dispatcher.hpp:94
WarpGemmMfmaBf16Bf16F32M32N32K16<> Type
Definition warp_gemm_dispatcher.hpp:71
WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:96
WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:82
WarpGemmMfmaBf16Bf16F32M64N4K16 Type
Definition warp_gemm_dispatcher.hpp:84
WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution<> Type
Definition warp_gemm_dispatcher.hpp:72
WarpGemmMfmaBf16Bf16F32M16N16K32< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:80
WarpGemmMfmaBf16Bf16F32M16N16K32<> Type
Definition warp_gemm_dispatcher.hpp:77
WarpGemmMfma_f32_32x32x32_bf8_bf8 Type
Definition warp_gemm_dispatcher.hpp:113
WarpGemmMfma_f32_32x32x64_bf8_bf8<> Type
Definition warp_gemm_dispatcher.hpp:132
WarpGemmMfma_f32_16x16x128_bf8_bf8<> Type
Definition warp_gemm_dispatcher.hpp:121
WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:115
WarpGemmMfma_f32_16x16x64_bf8_bf8 Type
Definition warp_gemm_dispatcher.hpp:116
WarpGemmMfma_f32_16x16x32_bf8_bf8 Type
Definition warp_gemm_dispatcher.hpp:114
WarpGemmMfma_f32_32x32x16_bf8_bf8 Type
Definition warp_gemm_dispatcher.hpp:112
WarpGemmWmma_f32_16x16x16_bf8_bf8< TransposeC > Type
Definition warp_gemm_dispatcher.hpp:156
WarpGemmMfma_f32_32x32x64_bf8_bf8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:140
WarpGemmMfma_f32_16x16x128_bf8_bf8_CTransposed<> Type
Definition warp_gemm_dispatcher.hpp:125
WarpGemmMfma_f32_16x16x128_bf8_bf8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:149
WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:117
WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:111
WarpGemmWmma_f32_16x16x16_bf8_f8< TransposeC > Type
Definition warp_gemm_dispatcher.hpp:158
WarpGemmMfma_f32_16x16x128_bf8_fp8_CTransposed<> Type
Definition warp_gemm_dispatcher.hpp:124
WarpGemmMfma_f32_32x32x64_bf8_fp8<> Type
Definition warp_gemm_dispatcher.hpp:131
WarpGemmMfma_f32_16x16x128_bf8_fp8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:147
WarpGemmMfma_f32_16x16x128_bf8_fp8<> Type
Definition warp_gemm_dispatcher.hpp:120
WarpGemmMfma_f32_32x32x16_bf8_fp8 Type
Definition warp_gemm_dispatcher.hpp:110
WarpGemmMfma_f32_32x32x64_bf8_fp8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:138
WarpGemmWmma_f32_16x16x16_f8_bf8< TransposeC > Type
Definition warp_gemm_dispatcher.hpp:157
WarpGemmMfma_f32_16x16x128_fp8_bf8<> Type
Definition warp_gemm_dispatcher.hpp:119
WarpGemmMfma_f32_32x32x16_fp8_bf8 Type
Definition warp_gemm_dispatcher.hpp:106
WarpGemmMfma_f32_32x32x64_fp8_bf8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:136
WarpGemmMfma_f32_16x16x32_fp8_bf8 Type
Definition warp_gemm_dispatcher.hpp:108
WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:107
WarpGemmMfma_f32_16x16x128_fp8_bf8_CTransposed<> Type
Definition warp_gemm_dispatcher.hpp:123
WarpGemmMfma_f32_32x32x64_fp8_bf8<> Type
Definition warp_gemm_dispatcher.hpp:130
WarpGemmMfma_f32_16x16x128_fp8_bf8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:145
WarpGemmMfma_f32_32x32x32_fp8_bf8 Type
Definition warp_gemm_dispatcher.hpp:109
WarpGemmMfma_f32_32x32x64_fp8_fp8<> Type
Definition warp_gemm_dispatcher.hpp:129
WarpGemmMfma_f32_32x32x16_fp8_fp8 Type
Definition warp_gemm_dispatcher.hpp:100
WarpGemmMfma_f32_16x16x128_fp8_fp8_CTransposed<> Type
Definition warp_gemm_dispatcher.hpp:122
WarpGemmMfma_f32_16x16x128_fp8_fp8<> Type
Definition warp_gemm_dispatcher.hpp:118
WarpGemmMfma_f32_16x16x32_fp8_fp8 Type
Definition warp_gemm_dispatcher.hpp:102
WarpGemmWmma_f32_16x16x16_f8_f8< TransposeC > Type
Definition warp_gemm_dispatcher.hpp:155
WarpGemmMfma_f32_16x16x64_fp8_fp8 Type
Definition warp_gemm_dispatcher.hpp:103
WarpGemmMfma_f32_32x32x32_fp8_fp8 Type
Definition warp_gemm_dispatcher.hpp:101
WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:104
WarpGemmMfma_f32_16x16x128_fp8_fp8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:143
WarpGemmMfma_f32_32x32x64_fp8_fp8< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:134
WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:105
WarpGemmMfmaF16F16F32M64N4K16 Type
Definition warp_gemm_dispatcher.hpp:48
WarpGemmMfmaF16F16F32M16N16K32< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:44
WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:46
WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:59
WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:60
WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:34
WarpGemmMfmaF16F16F32M32N32K16< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:38
WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution< WGAttrNumAccessEnum::Double > Type
Definition warp_gemm_dispatcher.hpp:40
WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution<> Type
Definition warp_gemm_dispatcher.hpp:42
WarpGemmMfmaF16F16F32M32N32K16SwizzleA Type
Definition warp_gemm_dispatcher.hpp:58
WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution Type
Definition warp_gemm_dispatcher.hpp:54
WarpGemmMfmaF16F16F32M16N16K32<> Type
Definition warp_gemm_dispatcher.hpp:41
WarpGemmMfmaF16F16F32M16N16K16 Type
Definition warp_gemm_dispatcher.hpp:53
WarpGemmMfmaF16F16F32M32N32K8 Type
Definition warp_gemm_dispatcher.hpp:33
WarpGemmMfmaF16F16F32M32N32K16<> Type
Definition warp_gemm_dispatcher.hpp:35
WarpGemmMfmaF16F16F32M4N64K16 Type
Definition warp_gemm_dispatcher.hpp:47
WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution<> Type
Definition warp_gemm_dispatcher.hpp:36
WarpGemmMfmaF16F16F32M32N32K8SwizzleA Type
Definition warp_gemm_dispatcher.hpp:57
WarpGemmWmma_i32_16x16x16_i8_i8< TransposeC > Type
Definition warp_gemm_dispatcher.hpp:167
WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:165
WarpGemmMfma_i32_32x32x16_i8_i8 Type
Definition warp_gemm_dispatcher.hpp:162
WarpGemmMfma_i32_16x16x32_i8_i8 Type
Definition warp_gemm_dispatcher.hpp:164
WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed Type
Definition warp_gemm_dispatcher.hpp:163
WarpGemmMfma_f32_16x16x128_fp4<> Type
Definition warp_gemm_dispatcher.hpp:127
WarpGemmMfma_f32_16x16x128_fp4< WGAttrNumAccessEnum::Quad > Type
Definition warp_gemm_dispatcher.hpp:152
WarpGemmMfmaF32F32F32M16N16K16<> Type
Definition warp_gemm_dispatcher.hpp:29
WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution<> Type
Definition warp_gemm_dispatcher.hpp:30
WarpGemmMfmaF32F32F32M16N16K4 Type
Definition warp_gemm_dispatcher.hpp:28
Definition warp_gemm_dispatcher.hpp:23