SharedMemTrait Struct Reference#
ck::GridwiseBatchedGemmGemm_Xdl_CShuffle< FloatAB, FloatGemmAcc, FloatCShuffle, FloatC, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc_AK0_M_AK1, BGridDesc_BK0_N_BK1, B1GridDesc_BK0_N_BK1, CGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1Value, BK1Value, B1K1Value, MPerXdl, NPerXdl, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched, PipelineVer >::SharedMemTrait Struct Reference
#include <gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp>
Static Public Attributes | |
| static constexpr auto | a_block_desc_ak0_m_ak1 |
| static constexpr auto | b_block_desc_bk0_n_bk1 |
| static constexpr auto | b1_block_desc_bk0_n_bk1 |
| static constexpr auto | max_lds_align = math::lcm(math::lcm(AK1, BK1), B1K1) |
| static constexpr auto | a_block_space_size_aligned |
| static constexpr auto | b_block_space_size_aligned |
| static constexpr auto | b1_block_space_size_aligned |
| static constexpr auto | a_block_space_offset = 0 |
| static constexpr auto | b_block_space_offset = a_block_space_size_aligned.value |
| static constexpr auto | b1_block_space_offset = 0 |
| static constexpr auto | c_shuffle_block_desc_mblock_mperblock_nblock_nperblock |
| static constexpr auto | c_block_space_size |
Member Data Documentation
◆ a_block_desc_ak0_m_ak1
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
__host__ static __device__ constexpr auto GetABlockDescriptor_AK0PerBlock_MPerBlock_AK1()
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:144
◆ a_block_space_offset
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ a_block_space_size_aligned
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
a_block_desc_ak0_m_ak1.GetElementSpaceSize(), max_lds_align)
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
Definition utility/math.hpp:78
static constexpr auto a_block_desc_ak0_m_ak1
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:321
static constexpr auto max_lds_align
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:504
◆ b1_block_desc_bk0_n_bk1
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
__host__ static __device__ constexpr auto GetB1BlockDescriptor_BK0PerBlock_NPerBlock_BK1()
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:160
◆ b1_block_space_offset
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ b1_block_space_size_aligned
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
b1_block_desc_bk0_n_bk1.GetElementSpaceSize(), max_lds_align)
static constexpr auto b1_block_desc_bk0_n_bk1
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:325
◆ b_block_desc_bk0_n_bk1
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
__host__ static __device__ constexpr auto GetBBlockDescriptor_BK0PerBlock_NPerBlock_BK1()
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:152
◆ b_block_space_offset
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ b_block_space_size_aligned
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
b_block_desc_bk0_n_bk1.GetElementSpaceSize(), max_lds_align)
static constexpr auto b_block_desc_bk0_n_bk1
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:323
static constexpr auto max_lds_align
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:328
◆ c_block_space_size
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
c_shuffle_block_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()
static constexpr auto c_shuffle_block_desc_mblock_mperblock_nblock_nperblock
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:342
◆ c_shuffle_block_desc_mblock_mperblock_nblock_nperblock
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
__host__ static __device__ constexpr auto GetCShuffleBlockDescriptor_MBlock_MPerBlock_NBlock_NPerBlock()
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:169
◆ max_lds_align
template<typename FloatAB, typename FloatGemmAcc, typename FloatCShuffle, typename FloatC, typename AElementwiseOperation, typename BElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_AK0_M_AK1, typename BGridDesc_BK0_N_BK1, typename B1GridDesc_BK0_N_BK1, typename CGridDesc_M_N, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t AK1Value, index_t BK1Value, index_t B1K1Value, index_t MPerXdl, index_t NPerXdl, index_t MXdlPerWave, index_t NXdlPerWave, index_t Gemm1NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1ThreadTransferSrcResetCoordinateAfterRun, index_t B1BlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
The documentation for this struct was generated from the following file: