GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize > Struct Template Reference

GetReduceCountPerThreadForMultiblockWelford&lt; K_BlockTileSize, KThreadSliceSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize > Struct Template Reference
ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize > Struct Template Reference

#include <welford_helper.hpp>

Public Member Functions

 GetReduceCountPerThreadForMultiblockWelford (index_t blkGroupSize, index_t numBlockTileIteration, long_index_t reduce_length)
__device__ index_t operator() (index_t block_local_id, index_t thread_k_cluster_id) const

Public Attributes

index_t blkGroupSize_
index_t numBlockTileIteration_
index_t last_block_reduce_length_
index_t numBlockTileIterationByLastBlock_

Constructor & Destructor Documentation

◆ GetReduceCountPerThreadForMultiblockWelford()

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::GetReduceCountPerThreadForMultiblockWelford ( index_t blkGroupSize,
index_t numBlockTileIteration,
long_index_t reduce_length )
inline

Member Function Documentation

◆ operator()()

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
__device__ index_t ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::operator() ( index_t block_local_id,
index_t thread_k_cluster_id ) const
inline

Member Data Documentation

◆ blkGroupSize_

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
index_t ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::blkGroupSize_

◆ last_block_reduce_length_

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
index_t ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::last_block_reduce_length_

◆ numBlockTileIteration_

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
index_t ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::numBlockTileIteration_

◆ numBlockTileIterationByLastBlock_

template<index_t K_BlockTileSize, index_t KThreadSliceSize>
index_t ck::tensor_operation::device::GetReduceCountPerThreadForMultiblockWelford< K_BlockTileSize, KThreadSliceSize >::numBlockTileIterationByLastBlock_

The documentation for this struct was generated from the following file: