BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB > Struct Template Reference

BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1&lt; BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB > Struct Template Reference
ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB > Struct Template Reference

#include <blockwise_gemm_smfmac_xdlops.hpp>

Public Types

using ThisThreadBlock = ThisThreadBlock<BlockSize>
using ThisThreadBlock = ThisThreadBlock<BlockSize>
using ElementDataTypeA
using ElementDataTypeB

Public Member Functions

__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ()
template<typename AThreadBuf, typename IdxBuf, int32_t num_elems>
__device__ void SetIdxSqueezeA (AThreadBuf &a_thread_buf, IdxBuf &idx_buf)
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const
__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ()
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const

Static Public Member Functions

static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K ()
__host__ static __device__ constexpr auto MakeBBlockDescriptor_N0_N1_N2_K ()
static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K ()
__host__ static __device__ constexpr auto MakeBBlockDescriptor_N0_N1_N2_K ()

Public Attributes

StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t MWaves = MPerBlock / (MRepeat * MPerXDL)
static constexpr index_t NWaves = NPerBlock / (NRepeat * NPerXDL)
static constexpr index_t WaveSize = BlockSize / MWaves / NWaves
static constexpr index_t MPerBlock = AK0MK1BlockDesc{}.GetLength(I1)
static constexpr index_t NPerBlock = BK0NK1BlockDesc{}.GetLength(I1)
static constexpr index_t KPerBlock
static constexpr index_t A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
static constexpr index_t B_K0 = BK0NK1BlockDesc{}.GetLength(I0)
static constexpr index_t A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
static constexpr index_t B_K1 = BK0NK1BlockDesc{}.GetLength(I2)
static constexpr auto xdlops_gemm
static constexpr index_t KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops
static constexpr auto a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()
static constexpr auto b_block_desc_n0_n1_n2_k = MakeBBlockDescriptor_N0_N1_N2_K()

Protected Types

using AThreadCopy
using BThreadCopy
using AThreadCopy
using BThreadCopy

Protected Attributes

AThreadCopy a_thread_copy_ {CalculateAThreadOriginDataIndex()}
BThreadCopy b_thread_copy_ {CalculateBThreadOriginDataIndex()}

Static Protected Attributes

static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Member Typedef Documentation

◆ AThreadCopy [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::AThreadCopy
protected
Initial value:
decltype(a_thread_desc_),
3,
static constexpr auto a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_dpp.hpp:254
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260

◆ AThreadCopy [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::AThreadCopy
protected
Initial value:
decltype(a_thread_desc_),
3,
conditional_t< is_same_v< ComputeTypeA, ck::tf32_t >, float, ComputeTypeA > ElementDataTypeA
Definition blockwise_gemm_xdlops.hpp:52

◆ BThreadCopy [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::BThreadCopy
protected
Initial value:
decltype(b_thread_desc_),
3,
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr auto b_block_desc_n0_n1_n2_k
Definition blockwise_gemm_dpp.hpp:255
BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35

◆ BThreadCopy [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::BThreadCopy
protected
Initial value:
decltype(b_thread_desc_),
3,
conditional_t< is_same_v< ComputeTypeB, ck::tf32_t >, float, ComputeTypeB > ElementDataTypeB
Definition blockwise_gemm_xdlops.hpp:54

◆ ElementDataTypeA

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ElementDataTypeA
Initial value:
typename conditional< predicate, X, Y >::type conditional_t
Definition utility/functional.hpp:115

◆ ElementDataTypeB

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ElementDataTypeB
Initial value:

◆ ThisThreadBlock [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ThisThreadBlock = ThisThreadBlock<BlockSize>

◆ ThisThreadBlock [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ThisThreadBlock = ThisThreadBlock<BlockSize>

Constructor & Destructor Documentation

◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ __device__ ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ( )
inline

◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ __device__ ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ( )
inline

Member Function Documentation

◆ CalculateAThreadOriginDataIndex() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateAThreadOriginDataIndex() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ CalculateCThreadOriginDataIndex() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ CalculateCThreadOriginDataIndex() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ CalculateCThreadOriginDataIndex8D() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateCThreadOriginDataIndex8D ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ CalculateCThreadOriginDataIndex8D() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::CalculateCThreadOriginDataIndex8D ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadBuffer() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ __device__ constexpr auto & ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadBuffer() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ __device__ constexpr auto & ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetWaveIdx() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetWaveIdx ( )
inlinestatic

◆ GetWaveIdx() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::GetWaveIdx ( )
inlinestatic

◆ MakeABlockDescriptor_M0_M1_M2_K() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeABlockDescriptor_M0_M1_M2_K ( )
inlinestaticconstexpr

◆ MakeABlockDescriptor_M0_M1_M2_K() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeABlockDescriptor_M0_M1_M2_K ( )
inlinestaticconstexpr

◆ MakeBBlockDescriptor_N0_N1_N2_K() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeBBlockDescriptor_N0_N1_N2_K ( )
inlinestaticconstexpr

◆ MakeBBlockDescriptor_N0_N1_N2_K() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeBBlockDescriptor_N0_N1_N2_K ( )
inlinestaticconstexpr

◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_G_M_N & c_grid_desc_g_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_G_M_N & c_grid_desc_g_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ Run() [1/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::Run ( const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_block_buf,
CThreadBuffer & c_thread_buf ) const
inline

◆ Run() [2/2]

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::Run ( const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_block_buf,
CThreadBuffer & c_thread_buf ) const
inline

◆ SetIdxSqueezeA()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename AThreadBuf, typename IdxBuf, int32_t num_elems>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::SetIdxSqueezeA ( AThreadBuf & a_thread_buf,
IdxBuf & idx_buf )
inline

Member Data Documentation

◆ a_block_desc_m0_m1_m2_k

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()
staticconstexpr

◆ A_K0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
staticconstexpr

◆ a_thread_copy_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
AThreadCopy ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::a_thread_copy_ {CalculateAThreadOriginDataIndex()}
protected

◆ a_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::a_thread_desc_
staticconstexprprotected
Initial value:
=
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I1
Definition blockwise_gemm_dpp.hpp:35

◆ b_block_desc_n0_n1_n2_k

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::b_block_desc_n0_n1_n2_k = MakeBBlockDescriptor_N0_N1_N2_K()
staticconstexpr

◆ B_K0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::B_K0 = BK0NK1BlockDesc{}.GetLength(I0)
staticconstexpr

◆ B_K1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::B_K1 = BK0NK1BlockDesc{}.GetLength(I2)
staticconstexpr

◆ b_thread_copy_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
BThreadCopy ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::b_thread_copy_ {CalculateBThreadOriginDataIndex()}
protected

◆ b_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::b_thread_desc_
staticconstexprprotected

◆ c_thread_buf_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::c_thread_buf_

◆ c_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::c_thread_desc_
staticconstexprprotected

◆ I0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::I3 = Number<3>{}
staticconstexpr

◆ KPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::KPerBlock
staticconstexpr
Initial value:
=
BK0NK1BlockDesc{}.GetLength(I0) * BK0NK1BlockDesc{}.GetLength(I2)
static constexpr auto I0
Definition blockwise_gemm_smfmac_xdlops.hpp:45

◆ KPerThread

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops
staticconstexpr

◆ MPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MPerBlock = AK0MK1BlockDesc{}.GetLength(I1)
staticconstexpr

◆ MWaves

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::MWaves = MPerBlock / (MRepeat * MPerXDL)
staticconstexpr

◆ NPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::NPerBlock = BK0NK1BlockDesc{}.GetLength(I1)
staticconstexpr

◆ NWaves

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::NWaves = NPerBlock / (NRepeat * NPerXDL)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::WaveSize = BlockSize / MWaves / NWaves
staticconstexpr

◆ xdlops_gemm

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::xdlops_gemm
staticconstexpr

The documentation for this struct was generated from the following files: