#include <smfmac_xdlops_gemm.hpp>
|
| __host__ __device__ constexpr | SparseXdlopsGemm () |
| template<class FloatA, class FloatB, class Idx, class FloatC> |
| __device__ void | Run (const FloatA &p_a_wave, const FloatB &p_b_wave, const Idx &idx, FloatC &p_c_thread) const |
◆ CIndex
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ CIndex4D
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ SparseXdlopsGemm()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __host__ __device__ constexpr ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::SparseXdlopsGemm |
( |
| ) |
|
|
inlineconstexpr |
◆ CalculateAThreadOriginDataIndex()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateAThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateBThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ GetBeginOfThreadBlk()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ GetBeginOfThreadBlk4D()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ GetBlkIdx()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBlkIdx |
( |
| ) |
|
|
inlinestatic |
◆ GetCM0M1M2NThreadBlkLengths()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetCM0M1M2NThreadBlkLengths |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetLaneId()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetLaneId |
( |
| ) |
|
|
inlinestatic |
◆ GetNumBlks()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ GetNumXdlops()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ GetRegSizePerXdlops()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| __device__ constexpr index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetRegSizePerXdlops |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetWaveSize()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
template<typename CDesc_G_M0_N0_M1_N1_M2_N2>
| __host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CDesc_G_M0_N0_M1_N1_M2_N2 & | c_desc_g_m0_n0_m1_n1_m2_n2 | ) |
|
|
inlinestaticconstexpr |
◆ MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
template<typename CDesc_M0_N0_M1_N1_M2_N2>
| __host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 |
( |
const CDesc_M0_N0_M1_N1_M2_N2 & | c_desc_m0_n0_m1_n1_m2_n2 | ) |
|
|
inlinestaticconstexpr |
◆ Run()
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
template<class FloatA, class FloatB, class Idx, class FloatC>
| __device__ void ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::Run |
( |
const FloatA & | p_a_wave, |
|
|
const FloatB & | p_b_wave, |
|
|
const Idx & | idx, |
|
|
FloatC & | p_c_thread ) const |
|
inline |
◆ I0
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ I1
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ I2
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ I3
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ I4
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ I5
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ K0PerXdlops
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
◆ K1PerXdlops
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::K1PerXdlops = smfmac.GetK1PerXdlops() |
|
staticconstexpr |
◆ KPerXdlops
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::KPerXdlops = smfmac.GetKPerXdlops() |
|
staticconstexpr |
◆ smfmac
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
Initial value:=
Definition smfmac_xdlops_gemm.hpp:140
◆ smfmac_instr
template<typename base_type,
index_t MPerXdlops,
index_t NPerXdlops,
index_t KPack, typename additional_type = base_type>
| auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::smfmac_instr = smfmac.selected_smfmac |
|
staticconstexpr |
The documentation for this struct was generated from the following file: