gridwise_2d_multiple_reduction_multiblock.hpp Source File#
gridwise_2d_multiple_reduction_multiblock.hpp
Go to the documentation of this file.
120 using Accumulation = detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType>;
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__global__ void kernel_multiple_reduce_multiblock(const InGridDesc_M_K in_grid_desc_m_k, const OutGridDesc_M_Tuple out_grid_desc_m_tuple, const InElementwiseOperationTuple in_elementwise_op_tuple, const AccElementwiseOperationTuple acc_elementwise_op_tuple, index_t block_group_size, index_t num_k_block_tile_iteration, Array< AccDataType, NumReduction > alpha_values, const InDataType *const __restrict__ p_in_value_global, Array< AccDataType, NumReduction > beta_values, OutDataTypePointerTuple p_out_value_global_tuple)
Definition gridwise_2d_multiple_reduction_multiblock.hpp:26
__host__ __device__ constexpr auto make_dynamic_buffer(T *p, ElementSpaceSize element_space_size)
Definition dynamic_buffer.hpp:472
Definition utility/array.hpp:14
Definition gridwise_2d_multiple_reduction_multiblock.hpp:69
static constexpr index_t K_BlockTileSize
Definition gridwise_2d_multiple_reduction_multiblock.hpp:118
tensor_operation::element_wise::PassThrough PassThroughOp
Definition gridwise_2d_multiple_reduction_multiblock.hpp:112
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadBufferDimAccessOrder
Definition gridwise_2d_multiple_reduction_multiblock.hpp:85
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_2d_multiple_reduction_multiblock.hpp:83
static __device__ void Run(const InGridDesc_M_K &in_grid_desc_m_k, const OutGridDesc_M_Tuple &out_grid_desc_m_tuple, const InElementwiseOperationTuple &in_elementwise_op_tuple, const AccElementwiseOperationTuple &acc_elementwise_op_tuple, index_t block_group_size, index_t num_k_block_tile_iteration, Array< AccDataType, NumReduction > alpha_values, const InDataType *const __restrict__ p_in_value_global, Array< AccDataType, NumReduction > beta_values, OutDataTypePointerTuple p_out_value_global_tuple)
Definition gridwise_2d_multiple_reduction_multiblock.hpp:122
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< MThreadSliceSize >{}))) ThreadReduceDstDesc_M
Definition gridwise_2d_multiple_reduction_multiblock.hpp:96
static constexpr auto I0
Definition gridwise_2d_multiple_reduction_multiblock.hpp:114
static constexpr auto I1
Definition gridwise_2d_multiple_reduction_multiblock.hpp:115
static constexpr index_t M_BlockTileSize
Definition gridwise_2d_multiple_reduction_multiblock.hpp:117
static constexpr auto thread_cluster_desc
Definition gridwise_2d_multiple_reduction_multiblock.hpp:91
decltype(make_naive_tensor_descriptor_packed( make_tuple(Number< MThreadSliceSize >{}, Number< KThreadSliceSize >{}))) ThreadReduceSrcDesc_M_K
Definition gridwise_2d_multiple_reduction_multiblock.hpp:94
static constexpr bool reorder_thread_cluster
Definition gridwise_2d_multiple_reduction_multiblock.hpp:81
detail::AccumulateWithNanCheck< PropagateNan, ReduceOperation, AccDataType > Accumulation
Definition gridwise_2d_multiple_reduction_multiblock.hpp:120
ThreadwiseReduction< AccDataType, ThreadReduceSrcDesc_M_K, ThreadReduceDstDesc_M, ReduceOperation, PropagateNan > ThreadwiseReduce
Definition gridwise_2d_multiple_reduction_multiblock.hpp:106
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_2d_multiple_reduction_multiblock.hpp:88
PartitionedBlockwiseReduction< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, ReduceOperation, PropagateNan > BlockwiseReduce
Definition gridwise_2d_multiple_reduction_multiblock.hpp:99
Definition reduction_functions_blockwise.hpp:28
static __device__ void Reduce(BufferType &work_buffer, AccDataType &in_out_value)
Definition reduction_functions_blockwise.hpp:44
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Definition reduction_functions_threadwise.hpp:23
static __device__ void Reduce(const SrcBufferType &src_buf, DstBufferType &dst_buf)
Definition reduction_functions_threadwise.hpp:36
Definition threadwise_tensor_slice_transfer.hpp:39
Helper structure that facilitates transfer of source (grid) data to destination threads.
Definition threadwise_tensor_slice_transfer.hpp:234
Definition utility/functional.hpp:100
Definition reduction_functions_accumulate.hpp:28
Definition reduction_common.hpp:20
Definition functional2.hpp:33
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340