DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference

DeviceGroupedConvBwdWeight_Wmma_CShuffle&lt; NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference
ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type > Struct Template Reference

#include <device_grouped_conv_bwd_weight_wmma_cshuffle.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >:
ck::tensor_operation::device::DeviceGroupedConvBwdWeight< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ComputeTypeA, ComputeTypeB > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle
using ADataType = OutDataType
using BDataType = InDataType
using CDataType = WeiDataType
using AElementwiseOperation = OutElementwiseOperation
using BElementwiseOperation = InElementwiseOperation
using CElementwiseOperation = WeiElementwiseOperation
using ABDataType = InDataType
using ABCGridDescs = decltype(GetABCGridDesc<NDimSpatial>())
using AGridDesc_K0_M_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I0])>
using BGridDesc_K0_N_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I1])>
using CGridDesc_M_N = remove_cvref_t<decltype(ABCGridDescs{}[I2])>
using CShuffleDataType = AccDataType
using GridwiseGemm
using DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
using CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
using Block2CTileMap

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static constexpr auto make_out_grid_desc (const index_t N, const index_t Do, const index_t Ho, const index_t Wo, const index_t K, const std::array< index_t, NDimSpatial+3 > &output_strides)
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static constexpr auto make_in_grid_desc (const index_t N, const index_t Di, const index_t Hi, const index_t Wi, const index_t C, const std::array< index_t, NDimSpatial+3 > &input_strides)
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static constexpr auto make_wei_grid_desc (const index_t K, const index_t Z, const index_t Y, const index_t X, const index_t C, const std::array< index_t, NDimSpatial+3 > &weights_strides)
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N (const index_t N, const index_t K, const index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &input_strides, const std::array< index_t, NDimSpatial+3 > &weights_strides, const std::array< index_t, NDimSpatial+3 > &output_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
static auto GetABCGridDesc ()
static constexpr bool IsValidCompilationParameter ()
static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const index_t split_k)
static auto MakeInvoker ()

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto GemmK1Number = Number<K1>{}
static constexpr index_t KPerBlock = K0PerBlock * GemmK1Number

Member Typedef Documentation

◆ ABCGridDescs

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABCGridDescs = decltype(GetABCGridDesc<NDimSpatial>())

◆ ABDataType

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ABDataType = InDataType

◆ ADataType

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::ADataType = OutDataType

◆ AElementwiseOperation

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AElementwiseOperation = OutElementwiseOperation

◆ AGridDesc_K0_M_K1

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::AGridDesc_K0_M_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I0])>

◆ BDataType

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BDataType = InDataType

◆ BElementwiseOperation

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BElementwiseOperation = InElementwiseOperation

◆ BGridDesc_K0_N_K1

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::BGridDesc_K0_N_K1 = remove_cvref_t<decltype(ABCGridDescs{}[I1])>

◆ Block2CTileMap

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::Block2CTileMap
Initial value:
remove_cvref_t< decltype(ABCGridDescs{}[I2])> CGridDesc_M_N
Definition device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp:285
static constexpr auto I1
Definition device_grouped_conv_bwd_weight_wmma_cshuffle.hpp:94

◆ CDataType

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CDataType = WeiDataType

◆ CElementwiseOperation

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CElementwiseOperation = WeiElementwiseOperation

◆ CGridDesc_M_N

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_M_N = remove_cvref_t<decltype(ABCGridDescs{}[I2])>

◆ CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

◆ CShuffleDataType

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::CShuffleDataType = AccDataType

◆ DeviceOp

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DeviceOp = DeviceGroupedConvBwdWeight_Wmma_CShuffle

◆ DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

◆ GridwiseGemm

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
using ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GridwiseGemm

Member Function Documentation

◆ GetABCGridDesc()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GetABCGridDesc ( )
inlinestatic

◆ GetTypeString()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::string ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [2/2]

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ IsValidCompilationParameter()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
constexpr bool ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::IsValidCompilationParameter ( )
inlinestaticconstexpr

◆ make_in_grid_desc()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_in_grid_desc ( const index_t N,
const index_t Di,
const index_t Hi,
const index_t Wi,
const index_t C,
const std::array< index_t, NDimSpatial+3 > & input_strides )
inlinestaticconstexpr

◆ make_out_grid_desc()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_out_grid_desc ( const index_t N,
const index_t Do,
const index_t Ho,
const index_t Wo,
const index_t K,
const std::array< index_t, NDimSpatial+3 > & output_strides )
inlinestaticconstexpr

◆ make_wei_grid_desc()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::make_wei_grid_desc ( const index_t K,
const index_t Z,
const index_t Y,
const index_t X,
const index_t C,
const std::array< index_t, NDimSpatial+3 > & weights_strides )
inlinestaticconstexpr

◆ MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
template<index_t NDim, typename ck::enable_if< NDim==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N ( const index_t N,
const index_t K,
const index_t C,
const std::array< index_t, NDimSpatial > & input_spatial_lengths,
const std::array< index_t, NDimSpatial > & filter_spatial_lengths,
const std::array< index_t, NDimSpatial > & output_spatial_lengths,
const std::array< index_t, NDimSpatial+3 > & input_strides,
const std::array< index_t, NDimSpatial+3 > & weights_strides,
const std::array< index_t, NDimSpatial+3 > & output_strides,
const std::array< index_t, NDimSpatial > & conv_filter_strides,
const std::array< index_t, NDimSpatial > & conv_filter_dilations,
const std::array< index_t, NDimSpatial > & input_left_pads,
const std::array< index_t, NDimSpatial > & input_right_pads )
inlinestatic

◆ MakeArgument()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeArgument ( const InDataType * p_in_grid,
WeiDataType * p_wei_grid,
const OutDataType * p_out_grid,
const std::array< index_t, NDimSpatial+3 > & a_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > & a_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_strides,
const std::array< index_t, NDimSpatial+3 > & e_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > & e_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial > & conv_filter_strides,
const std::array< index_t, NDimSpatial > & conv_filter_dilations,
const std::array< index_t, NDimSpatial > & input_left_pads,
const std::array< index_t, NDimSpatial > & input_right_pads,
InElementwiseOperation in_element_op,
WeiElementwiseOperation wei_element_op,
OutElementwiseOperation out_element_op,
const index_t split_k )
inlinestatic

◆ MakeArgumentPointer()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeArgumentPointer ( const void * p_in_grid,
void * p_wei_grid,
const void * p_out_grid,
const std::array< index_t, NDimSpatial+3 > & a_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > & a_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_strides,
const std::array< index_t, NDimSpatial+3 > & e_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > & e_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial > & conv_filter_strides,
const std::array< index_t, NDimSpatial > & conv_filter_dilations,
const std::array< index_t, NDimSpatial > & input_left_pads,
const std::array< index_t, NDimSpatial > & input_right_pads,
InElementwiseOperation in_element_op,
WeiElementwiseOperation wei_element_op,
OutElementwiseOperation out_element_op,
const index_t split_k )
inlineoverridevirtual

◆ MakeInvoker()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer()

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::MakeInvokerPointer ( )
inlineoverridevirtual

Member Data Documentation

◆ GemmK1Number

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::GemmK1Number = Number<K1>{}
staticconstexpr

◆ I0

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
auto ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::I5 = Number<5>{}
staticconstexpr

◆ KPerBlock

template<index_t NDimSpatial, typename InLayout, typename WeiLayout, typename OutLayout, typename InDataType, typename WeiDataType, typename OutDataType, typename AccDataType, typename InElementwiseOperation, typename WeiElementwiseOperation, typename OutElementwiseOperation, ConvolutionBackwardWeightSpecialization ConvBackwardWeightSpecialization, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t K1, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1, typename ck::enable_if< NDimSpatial==3, bool >::type = false>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Wmma_CShuffle< NDimSpatial, InLayout, WeiLayout, OutLayout, InDataType, WeiDataType, OutDataType, AccDataType, InElementwiseOperation, WeiElementwiseOperation, OutElementwiseOperation, ConvBackwardWeightSpecialization, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerWMMA, NPerWMMA, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer, type >::KPerBlock = K0PerBlock * GemmK1Number
staticconstexpr

The documentation for this struct was generated from the following file: