gridwise_gemm_xdlops_bwd_weight.hpp File Reference

gridwise_gemm_xdlops_bwd_weight.hpp File Reference#

Composable Kernel: gridwise_gemm_xdlops_bwd_weight.hpp File Reference
gridwise_gemm_xdlops_bwd_weight.hpp File Reference

Go to the source code of this file.

Classes

struct  ck::Merge_v4_no_carry< LowLengths >
struct  ck::GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight< BlockSize, FloatA, FloatB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_B_K0_M_K1, BGridDesc_B_K0_N_K1, CMNGridDesc, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, ABlockLdsM1PerBlock, ABlockLdsM0PerBlock, ABlockLdsM1Padding, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, BBlockLdsN1PerBlock, BBlockLdsN0PerBlock, BBlockLdsN1Padding, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferScalarPerVector_NWaveNPerXDL, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, ABlockLdsExtraM1Wrw, BBlockLdsExtraN1Wrw, NumGemmKPrefetchStage, PipelineVer, ComputeTypeA, ComputeTypeB >

Namespaces

namespace  ck

Functions

template<typename LowLengths>
__host__ __device__ constexpr auto ck::make_merge_transform_v4_no_carry (const LowLengths &low_lengths)
template<typename GridwiseGemm, typename FloatA, typename FloatB, typename FloatC, typename AGridDesc_B_K0_M_K1, typename BGridDesc_B_K0_N_K1, typename CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, typename CBlockClusterAdaptor, bool HasMainKBlockLoop>
__global__ void ck::kernel_gemm_xdlops_bwd_weight (const FloatA *__restrict__ p_a_grid, const FloatB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const AGridDesc_B_K0_M_K1 a_b_k0_m_k1_grid_desc, const BGridDesc_B_K0_N_K1 b_b_k0_n_k1_grid_desc, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const CBlockClusterAdaptor c_block_cluster_adaptor)