include/ck_tile/host/fill.hpp#
include/ck_tile/host/fill.hpp
Functor for filling a range with randomly generated values from a uniform distribution.
Functor for filling a range with randomly generated values from a uniform distribution.This struct provides functionality to fill iterators or ranges with random values generated from a uniform distribution. It supports both single-threaded and multi-threaded operation.
- Template Parameters
-
T The target type for the generated values.
- Note
- The multi-threaded implementation is not guaranteed to provide perfectly distributed values across threads.
// Direct usage without creating a separate variable:
ck_tile::FillUniformDistribution<ADataType>{-1.f, 1.f}(a_host_tensor);
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <algorithm>
#include <cmath>
#include <iterator>
#include <optional>
#include <random>
#include <stdexcept>
#include <type_traits>
#include <utility>
#include <unordered_set>
#include "ck_tile/core.hpp"
#include "ck_tile/host/joinable_thread.hpp"
namespace ck_tile {
template <typename T>
struct FillUniformDistribution
{
std::optional<uint32_t> seed_{11939};
// ATTENTION: Whether to use multi-threading (note: not guaranteed to be perfectly distributed
// across threads).
template <typename ForwardIter>
{
if(threaded)
{
uint32_t num_thread = std::thread::hardware_concurrency();
auto total = static_cast<std::size_t>(std::distance(first, last));
auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
std::vector<joinable_thread> threads(num_thread);
for(std::size_t it = 0; it < num_thread; ++it)
{
std::size_t iw_begin = it * work_per_thread;
std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
auto thread_f = [this, total, iw_begin, iw_end, &first] {
if(iw_begin > total || iw_end > total)
return;
// need to make each thread unique, add an offset to current seed
: std::random_device{}());
std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
else
return ck_tile::type_convert<T>(dis(gen));
});
};
threads[it] = joinable_thread(thread_f);
}
}
else
{
std::generate(first, last, [&dis, &gen]() {
else
return ck_tile::type_convert<T>(dis(gen));
});
}
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <>
struct FillUniformDistribution<ck_tile::pk_int4_t>
{
// `FillUniformDistribution<Type>{-5.0f, 5.0f}` works for all types
float b_{7.f};
std::optional<uint32_t> seed_{11939};
template <typename ForwardIter>
{
if(a_ < -8.0f || b_ > 7.0f)
{
throw std::runtime_error(
"a_ or b_ of FillUniformDistribution<ck_tile::pk_int4_t> is out of range.");
}
constexpr auto int4_array = std::array<uint8_t, 16>{0x88,
0x99,
0xaa,
0xbb,
0xcc,
0xdd,
0xee,
0xff,
0x00,
0x11,
0x22,
0x33,
0x44,
0x55,
0x66,
0x77};
std::uniform_int_distribution<std::int32_t> dis(0, max_value - min_value + 1);
while(first != last)
{
int randomInt = dis(gen);
*first = int4_array[randomInt + (min_value + 8)];
++first;
}
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
namespace impl {
// clang-format off
template<index_t bytes> struct RawIntegerType_ {};
// clang-format on
template <typename T>
} // namespace impl
// Note: this struct will have no const-ness will generate random
template <typename T>
struct FillUniformDistribution_Unique
{
std::optional<uint32_t> seed_{11939};
std::mt19937 gen_{};
std::unordered_set<impl::RawIntegerType<T>> set_{};
float b = 5.f,
std::optional<uint32_t> seed = {11939})
b_(b),
seed_(seed),
set_{}
{
}
template <typename ForwardIter>
{
std::mt19937& gen = gen_;
std::generate(first, last, [&dis, &gen, &set]() {
T v = static_cast<T>(0);
do
{
v = ck_tile::type_convert<T>(dis(gen));
return v;
});
}
template <typename ForwardRange>
auto operator()(ForwardRange&& range)
-> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T>
struct FillNormalDistribution
{
std::optional<uint32_t> seed_{11939};
// ATTENTION: threaded does not guarantee the distribution between thread
template <typename ForwardIter>
{
if(threaded)
{
uint32_t num_thread = std::thread::hardware_concurrency();
auto total = static_cast<std::size_t>(std::distance(first, last));
auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
std::vector<joinable_thread> threads(num_thread);
for(std::size_t it = 0; it < num_thread; ++it)
{
std::size_t iw_begin = it * work_per_thread;
std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
auto thread_f = [this, total, iw_begin, iw_end, &first] {
if(iw_begin > total || iw_end > total)
return;
// need to make each thread unique, add an offset to current seed
: std::random_device{}());
std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen));
});
};
threads[it] = joinable_thread(thread_f);
}
}
else
{
std::generate(
first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
}
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
// Normally FillUniformDistributionIntegerValue should use std::uniform_int_distribution as below.
// However this produces segfaults in std::mt19937 which look like inifite loop.
// template <typename T>
// struct FillUniformDistributionIntegerValue
// {
// int a_{-5};
// int b_{5};
//
// template <typename ForwardIter>
// void operator()(ForwardIter first, ForwardIter last) const
// {
// std::mt19937 gen(11939);
// std::uniform_int_distribution<int> dis(a_, b_);
// std::generate(
// first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
// }
// };
// Workaround for uniform_int_distribution not working as expected. See note above.<
template <typename T>
struct FillUniformDistributionIntegerValue
{
std::optional<uint32_t> seed_{11939};
template <typename ForwardIter>
{
std::generate(
first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T>
struct FillNormalDistributionIntegerValue
{
std::optional<uint32_t> seed_{11939};
template <typename ForwardIter>
{
std::generate(
first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T>
struct FillMonotonicSeq
{
T init_value_{0};
T step_{1};
template <typename ForwardIter>
{
auto tmp = n;
if constexpr(std::is_same_v<decltype(tmp), pk_int4_t>)
{
n.data += step_.data;
}
else
{
n += step_;
}
return tmp;
});
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T, bool IsAscending = true>
struct FillStepRange
{
template <typename ForwardIter>
{
auto tmp = n;
n += step_;
if constexpr(IsAscending)
{
if(n > end_value_)
n = start_value_;
}
else
{
if(n < end_value_)
n = start_value_;
}
return type_convert<T>(tmp);
});
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillStepRange&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T>
struct FillConstant
{
T value_{0};
template <typename ForwardIter>
{
std::fill(first, last, value_);
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillConstant&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
//----------------------------------------------------------------------------------------------
template <typename T>
struct AdjustToStructuredSparsity
{
// masks represent all valid 2:4 structured sparsity permutations
// clang-format off
0, 1, 0, 1,
0, 1, 1, 0,
1, 0, 0, 1,
1, 0, 1, 0,
1, 1, 0, 0,
0, 0, 0, 1,
0, 0, 1, 0,
0, 1, 0, 0,
1, 0, 0, 0};
// clang-format on
template <typename ForwardIter>
{
index += 1;
return type_convert<T>(tmp);
});
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const AdjustToStructuredSparsity&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
template <typename T, bool UseCos = true, bool UseAbs = false>
struct FillTrigValue
{
template <typename T_, bool UseCos_ = true, bool UseAbs_ = false>
struct LinearTrigGen
{
{
float v = 0;
if constexpr(UseCos_)
{
}
else
{
}
if constexpr(UseAbs_)
v = abs(v);
i++;
return ck_tile::type_convert<T_>(v);
}
};
template <typename ForwardIter>
{
LinearTrigGen<T, UseCos, UseAbs> gen;
std::generate(first, last, gen);
}
template <typename ForwardRange>
-> std::void_t<decltype(std::declval<const FillTrigValue&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
};
} // namespace ck_tile
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition tile/host/fill.hpp:168
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_HOST_DEVICE constexpr Y type_convert(X x)
Definition tile/core/numeric/type_convert.hpp:29
static constexpr int32_t masks[]
Definition tile/host/fill.hpp:458
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:471
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:433
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:361
std::optional< uint32_t > seed_
Definition tile/host/fill.hpp:229
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:234
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:335
float variance_
Definition tile/host/fill.hpp:331
std::optional< uint32_t > seed_
Definition tile/host/fill.hpp:332
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:396
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:517
void operator()(ForwardIter first, ForwardIter last)
Definition tile/host/fill.hpp:194
std::optional< uint32_t > seed_
Definition tile/host/fill.hpp:177
FillUniformDistribution_Unique(float a=-5.f, float b=5.f, std::optional< uint32_t > seed={11939})
Definition tile/host/fill.hpp:182
std::unordered_set< impl::RawIntegerType< T > > set_
Definition tile/host/fill.hpp:180
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:49
std::optional< uint32_t > seed_
Definition tile/host/fill.hpp:43
std::optional< uint32_t > seed_
Definition tile/host/fill.hpp:305
void operator()(ForwardIter first, ForwardIter last) const
Definition tile/host/fill.hpp:308
static constexpr int PackedSize
Definition tile/core/numeric/numeric.hpp:82
Definition library/utility/host_tensor.hpp:616