GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize > Struct Template Reference

GridwiseReduceSecondHalfBatchNormBackwardFinal&lt; XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize > Struct Template Reference
ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize > Struct Template Reference

#include <gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp>

Public Types

using ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>
using ThreadBufferDimAccessOrder
using ThreadClusterArrangeOrder
using ThreadReduceSrcDesc_M_1
using ThreadReduceDstDesc_M
using BlockwiseReduce
using ThreadwiseReduce
using PassThroughOp = tensor_operation::element_wise::PassThrough

Static Public Member Functions

static __device__ void Run (const XYGridDesc_M_K &x_grid_desc_m_k, const XYGridDesc_M_K &dy_grid_desc_m_k, const XYGridDesc_M_K &dx_grid_desc_m_k, const DscaleDbiasGridDesc_M_K &dscale_dbias_grid_desc_m_k, const MeanVarGridDesc_M &mean_var_grid_desc_m, const ScaleBiasGridDesc_M &scale_grid_desc_m, const ScaleBiasGridDesc_M &dscale_dbias_grid_desc_m, index_t blkgroup_size, long_index_t reduce_size, index_t num_xy_k_block_tile_iteration, index_t num_dscale_dbias_k_block_tile_iteration, const DscaleDbiasDataType *const __restrict__ p_reduce_dscale, const DscaleDbiasDataType *const __restrict__ p_reduce_dbias, const MeanVarDataType *const __restrict__ p_mean, const MeanVarDataType *const __restrict__ p_inv_var, const XDataType *const __restrict__ p_x, const DyDataType *const __restrict__ p_dy, const ScaleDataType *const __restrict__ p_scale, const DyElementwiseOp dy_elementwise_op, DxDataType *const __restrict__ p_dx, DscaleDbiasDataType *const __restrict__ p_dscale, DscaleDbiasDataType *const __restrict__ p_dbias)

Static Public Attributes

static constexpr bool reorder_thread_cluster = (XDyDxVectorDim == 0)
static constexpr auto thread_cluster_desc
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ BlockwiseReduce

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::BlockwiseReduce
Initial value:
BlockSize,
false>
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:110
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:115
Definition reduction_functions_blockwise.hpp:28
Definition reduction_operator.hpp:37

◆ PassThroughOp

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::PassThroughOp = tensor_operation::element_wise::PassThrough

◆ ThreadBufferDimAccessOrder

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadBufferDimAccessOrder
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100

◆ ThreadClusterArrangeOrder

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadClusterArrangeOrder

◆ ThreadClusterLengths_M_K

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>

◆ ThreadReduceDstDesc_M

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadReduceDstDesc_M
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ ThreadReduceSrcDesc_M_1

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadReduceSrcDesc_M_1

◆ ThreadwiseReduce

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
using ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadwiseReduce
Initial value:
ThreadwiseReduction<AccDataType,
false>
decltype(make_naive_tensor_descriptor_packed( make_tuple(Number< MThreadSliceSize >{}, Number< 1 >{}))) ThreadReduceSrcDesc_M_1
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:121
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< MThreadSliceSize >{}))) ThreadReduceDstDesc_M
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:123
Definition reduction_functions_threadwise.hpp:23

Member Function Documentation

◆ Run()

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
__device__ void ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::Run ( const XYGridDesc_M_K & x_grid_desc_m_k,
const XYGridDesc_M_K & dy_grid_desc_m_k,
const XYGridDesc_M_K & dx_grid_desc_m_k,
const DscaleDbiasGridDesc_M_K & dscale_dbias_grid_desc_m_k,
const MeanVarGridDesc_M & mean_var_grid_desc_m,
const ScaleBiasGridDesc_M & scale_grid_desc_m,
const ScaleBiasGridDesc_M & dscale_dbias_grid_desc_m,
index_t blkgroup_size,
long_index_t reduce_size,
index_t num_xy_k_block_tile_iteration,
index_t num_dscale_dbias_k_block_tile_iteration,
const DscaleDbiasDataType *const __restrict__ p_reduce_dscale,
const DscaleDbiasDataType *const __restrict__ p_reduce_dbias,
const MeanVarDataType *const __restrict__ p_mean,
const MeanVarDataType *const __restrict__ p_inv_var,
const XDataType *const __restrict__ p_x,
const DyDataType *const __restrict__ p_dy,
const ScaleDataType *const __restrict__ p_scale,
const DyElementwiseOp dy_elementwise_op,
DxDataType *const __restrict__ p_dx,
DscaleDbiasDataType *const __restrict__ p_dscale,
DscaleDbiasDataType *const __restrict__ p_dbias )
inlinestatic

Member Data Documentation

◆ I0

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
auto ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
auto ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::I1 = Number<1>{}
staticconstexpr

◆ K_BlockTileSize

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
index_t ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
index_t ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ reorder_thread_cluster

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
bool ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::reorder_thread_cluster = (XDyDxVectorDim == 0)
staticconstexpr

◆ thread_cluster_desc

template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename DscaleDbiasGridDesc_M_K, typename MeanVarGridDesc_M, typename ScaleBiasGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
auto ck::GridwiseReduceSecondHalfBatchNormBackwardFinal< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, DscaleDbiasGridDesc_M_K, MeanVarGridDesc_M, ScaleBiasGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::thread_cluster_desc
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13

The documentation for this struct was generated from the following file: