#874057 (C++17) No.2326 Factorial to the Power of Factorial to the...

提出ソース
結果

問題	No.2326 Factorial to the Power of Factorial to the...
コンテスト
ユーザー	suisen
提出日時	2023-05-28 14:03:46
言語	C++17 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=c++17 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	AC
実行時間	14 ms / 2,000 ms
コード長	38,107 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	3,528 ms
コンパイル使用メモリ	323,964 KB
最終ジャッジ日時	2025-02-13 10:44:01
ジャッジサーバーID （参考情報）	judge2 / judge1
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 2
other	AC * 20
権限があれば一括ダウンロードができます
ソースコード

raw source code
#include <bits/stdc++.h>

#ifdef _MSC_VER
#  include <intrin.h>
#else
#  include <x86intrin.h>
#endif

#include <limits>
#include <type_traits>

namespace suisen {
// ! utility
template <typename ...Types>
using constraints_t = std::enable_if_t<std::conjunction_v<Types...>, std::nullptr_t>;
template <bool cond_v, typename Then, typename OrElse>
constexpr decltype(auto) constexpr_if(Then&& then, OrElse&& or_else) {
    if constexpr (cond_v) {
        return std::forward<Then>(then);
    } else {
        return std::forward<OrElse>(or_else);
    }
}

// ! function
template <typename ReturnType, typename Callable, typename ...Args>
using is_same_as_invoke_result = std::is_same<std::invoke_result_t<Callable, Args...>, ReturnType>;
template <typename F, typename T>
using is_uni_op = is_same_as_invoke_result<T, F, T>;
template <typename F, typename T>
using is_bin_op = is_same_as_invoke_result<T, F, T, T>;

template <typename Comparator, typename T>
using is_comparator = std::is_same<std::invoke_result_t<Comparator, T, T>, bool>;

// ! integral
template <typename T, typename = constraints_t<std::is_integral<T>>>
constexpr int bit_num = std::numeric_limits<std::make_unsigned_t<T>>::digits;
template <typename T, unsigned int n>
struct is_nbit { static constexpr bool value = bit_num<T> == n; };
template <typename T, unsigned int n>
static constexpr bool is_nbit_v = is_nbit<T, n>::value;

// ?
template <typename T>
struct safely_multipliable {};
template <>
struct safely_multipliable<int> { using type = long long; };
template <>
struct safely_multipliable<long long> { using type = __int128_t; };
template <>
struct safely_multipliable<unsigned int> { using type = unsigned long long; };
template <>
struct safely_multipliable<unsigned long int> { using type = __uint128_t; };
template <>
struct safely_multipliable<unsigned long long> { using type = __uint128_t; };
template <>
struct safely_multipliable<float> { using type = float; };
template <>
struct safely_multipliable<double> { using type = double; };
template <>
struct safely_multipliable<long double> { using type = long double; };
template <typename T>
using safely_multipliable_t = typename safely_multipliable<T>::type;

template <typename T, typename = void>
struct rec_value_type {
    using type = T;
};
template <typename T>
struct rec_value_type<T, std::void_t<typename T::value_type>> {
    using type = typename rec_value_type<typename T::value_type>::type;
};
template <typename T>
using rec_value_type_t = typename rec_value_type<T>::type;

} // namespace suisen

// ! type aliases
using i128 = __int128_t;
using u128 = __uint128_t;

template <typename T>
using pq_greater = std::priority_queue<T, std::vector<T>, std::greater<T>>;

// ! macros (internal)
#define DETAIL_OVERLOAD2(_1,_2,name,...) name
#define DETAIL_OVERLOAD3(_1,_2,_3,name,...) name
#define DETAIL_OVERLOAD4(_1,_2,_3,_4,name,...) name

#define DETAIL_REP4(i,l,r,s)  for(std::remove_reference_t<std::remove_const_t<decltype(r)>>i=(l);i<(r);i+=(s))
#define DETAIL_REP3(i,l,r)    DETAIL_REP4(i,l,r,1)
#define DETAIL_REP2(i,n)      DETAIL_REP3(i,0,n)
#define DETAIL_REPINF3(i,l,s) for(std::remove_reference_t<std::remove_const_t<decltype(l)>>i=(l);;i+=(s))
#define DETAIL_REPINF2(i,l)   DETAIL_REPINF3(i,l,1)
#define DETAIL_REPINF1(i)     DETAIL_REPINF2(i,0)
#define DETAIL_RREP4(i,l,r,s) for(std::remove_reference_t<std::remove_const_t<decltype(r)>>i=(l)+fld((r)-(l)-1,s)*(s);i>=(l);i-=(s))
#define DETAIL_RREP3(i,l,r)   DETAIL_RREP4(i,l,r,1)
#define DETAIL_RREP2(i,n)     DETAIL_RREP3(i,0,n)

#define DETAIL_CAT_I(a, b) a##b
#define DETAIL_CAT(a, b) DETAIL_CAT_I(a, b)
#define DETAIL_UNIQVAR(tag) DETAIL_CAT(tag, __LINE__)

// ! macros
#define REP(...)    DETAIL_OVERLOAD4(__VA_ARGS__, DETAIL_REP4   , DETAIL_REP3   , DETAIL_REP2   )(__VA_ARGS__)
#define RREP(...)   DETAIL_OVERLOAD4(__VA_ARGS__, DETAIL_RREP4  , DETAIL_RREP3  , DETAIL_RREP2  )(__VA_ARGS__)
#define REPINF(...) DETAIL_OVERLOAD3(__VA_ARGS__, DETAIL_REPINF3, DETAIL_REPINF2, DETAIL_REPINF1)(__VA_ARGS__)

#define LOOP(n) for (std::remove_reference_t<std::remove_const_t<decltype(n)>> DETAIL_UNIQVAR(loop_variable) = n; DETAIL_UNIQVAR(loop_variable) --> 0;)

#define ALL(iterable) std::begin(iterable), std::end(iterable)
#define INPUT(type, ...) type __VA_ARGS__; read(__VA_ARGS__)

// ! debug

#ifdef LOCAL
#  define debug(...) debug_internal(#__VA_ARGS__, __VA_ARGS__)

template <class T, class... Args>
void debug_internal(const char* s, T&& first, Args&&... args) {
    constexpr const char* prefix = "[\033[32mDEBUG\033[m] ";
    constexpr const char* open_brakets = sizeof...(args) == 0 ? "" : "(";
    constexpr const char* close_brakets = sizeof...(args) == 0 ? "" : ")";
    std::cerr << prefix << open_brakets << s << close_brakets << ": " << open_brakets << std::forward<T>(first);
    ((std::cerr << ", " << std::forward<Args>(args)), ...);
    std::cerr << close_brakets << "\n";
}

#else
#  define debug(...) void(0)
#endif

// ! I/O utilities

// __int128_t
std::ostream& operator<<(std::ostream& dest, __int128_t value) {
    std::ostream::sentry s(dest);
    if (s) {
        __uint128_t tmp = value < 0 ? -value : value;
        char buffer[128];
        char* d = std::end(buffer);
        do {
            --d;
            *d = "0123456789"[tmp % 10];
            tmp /= 10;
        } while (tmp != 0);
        if (value < 0) {
            --d;
            *d = '-';
        }
        int len = std::end(buffer) - d;
        if (dest.rdbuf()->sputn(d, len) != len) {
            dest.setstate(std::ios_base::badbit);
        }
    }
    return dest;
}
// __uint128_t
std::ostream& operator<<(std::ostream& dest, __uint128_t value) {
    std::ostream::sentry s(dest);
    if (s) {
        char buffer[128];
        char* d = std::end(buffer);
        do {
            --d;
            *d = "0123456789"[value % 10];
            value /= 10;
        } while (value != 0);
        int len = std::end(buffer) - d;
        if (dest.rdbuf()->sputn(d, len) != len) {
            dest.setstate(std::ios_base::badbit);
        }
    }
    return dest;
}

// pair
template <typename T, typename U>
std::ostream& operator<<(std::ostream& out, const std::pair<T, U>& a) {
    return out << a.first << ' ' << a.second;
}
// tuple
template <unsigned int N = 0, typename ...Args>
std::ostream& operator<<(std::ostream& out, const std::tuple<Args...>& a) {
    if constexpr (N >= std::tuple_size_v<std::tuple<Args...>>) return out;
    else {
        out << std::get<N>(a);
        if constexpr (N + 1 < std::tuple_size_v<std::tuple<Args...>>) out << ' ';
        return operator<<<N + 1>(out, a);
    }
}
// vector
template <typename T>
std::ostream& operator<<(std::ostream& out, const std::vector<T>& a) {
    for (auto it = a.begin(); it != a.end();) {
        out << *it;
        if (++it != a.end()) out << ' ';
    }
    return out;
}
// array
template <typename T, size_t N>
std::ostream& operator<<(std::ostream& out, const std::array<T, N>& a) {
    for (auto it = a.begin(); it != a.end();) {
        out << *it;
        if (++it != a.end()) out << ' ';
    }
    return out;
}
inline void print() { std::cout << '\n'; }
template <typename Head, typename... Tail>
inline void print(const Head& head, const Tail &...tails) {
    std::cout << head;
    if (sizeof...(tails)) std::cout << ' ';
    print(tails...);
}
template <typename Iterable>
auto print_all(const Iterable& v, std::string sep = " ", std::string end = "\n") -> decltype(std::cout << *v.begin(), void()) {
    for (auto it = v.begin(); it != v.end();) {
        std::cout << *it;
        if (++it != v.end()) std::cout << sep;
    }
    std::cout << end;
}

__int128_t stoi128(const std::string& s) {
    __int128_t ret = 0;
    for (int i = 0; i < int(s.size()); i++) if ('0' <= s[i] and s[i] <= '9') ret = 10 * ret + s[i] - '0';
    if (s[0] == '-') ret = -ret;
    return ret;
}
__uint128_t stou128(const std::string& s) {
    __uint128_t ret = 0;
    for (int i = 0; i < int(s.size()); i++) if ('0' <= s[i] and s[i] <= '9') ret = 10 * ret + s[i] - '0';
    return ret;
}
// __int128_t
std::istream& operator>>(std::istream& in, __int128_t& v) {
    std::string s;
    in >> s;
    v = stoi128(s);
    return in;
}
// __uint128_t
std::istream& operator>>(std::istream& in, __uint128_t& v) {
    std::string s;
    in >> s;
    v = stou128(s);
    return in;
}
// pair
template <typename T, typename U>
std::istream& operator>>(std::istream& in, std::pair<T, U>& a) {
    return in >> a.first >> a.second;
}
// tuple
template <unsigned int N = 0, typename ...Args>
std::istream& operator>>(std::istream& in, std::tuple<Args...>& a) {
    if constexpr (N >= std::tuple_size_v<std::tuple<Args...>>) return in;
    else return operator>><N + 1>(in >> std::get<N>(a), a);
}
// vector
template <typename T>
std::istream& operator>>(std::istream& in, std::vector<T>& a) {
    for (auto it = a.begin(); it != a.end(); ++it) in >> *it;
    return in;
}
// array
template <typename T, size_t N>
std::istream& operator>>(std::istream& in, std::array<T, N>& a) {
    for (auto it = a.begin(); it != a.end(); ++it) in >> *it;
    return in;
}
template <typename ...Args>
void read(Args &...args) {
    (std::cin >> ... >> args);
}

// ! integral utilities

// Returns pow(-1, n)
template <typename T> constexpr inline int pow_m1(T n) {
    return -(n & 1) | 1;
}
// Returns pow(-1, n)
template <> constexpr inline int pow_m1<bool>(bool n) {
    return -int(n) | 1;
}

// Returns floor(x / y)
template <typename T> constexpr inline T fld(const T x, const T y) {
    return (x ^ y) >= 0 ? x / y : (x - (y + pow_m1(y >= 0))) / y;
}
template <typename T> constexpr inline T cld(const T x, const T y) {
    return (x ^ y) <= 0 ? x / y : (x + (y + pow_m1(y >= 0))) / y;
}

template <typename T, std::enable_if_t<std::negation_v<suisen::is_nbit<T, 64>>, std::nullptr_t> = nullptr>
__attribute__((target("popcnt"))) constexpr inline int popcount(const T x) { return _mm_popcnt_u32(x); }
template <typename T, std::enable_if_t<suisen::is_nbit_v<T, 64>, std::nullptr_t> = nullptr>
__attribute__((target("popcnt"))) constexpr inline int popcount(const T x) { return _mm_popcnt_u64(x); }
template <typename T, std::enable_if_t<std::negation_v<suisen::is_nbit<T, 64>>, std::nullptr_t> = nullptr>
constexpr inline int count_lz(const T x) { return x ? __builtin_clz(x) : suisen::bit_num<T>; }
template <typename T, std::enable_if_t<suisen::is_nbit_v<T, 64>, std::nullptr_t> = nullptr>
constexpr inline int count_lz(const T x) { return x ? __builtin_clzll(x) : suisen::bit_num<T>; }
template <typename T, std::enable_if_t<std::negation_v<suisen::is_nbit<T, 64>>, std::nullptr_t> = nullptr>
constexpr inline int count_tz(const T x) { return x ? __builtin_ctz(x) : suisen::bit_num<T>; }
template <typename T, std::enable_if_t<suisen::is_nbit_v<T, 64>, std::nullptr_t> = nullptr>
constexpr inline int count_tz(const T x) { return x ? __builtin_ctzll(x) : suisen::bit_num<T>; }
template <typename T> constexpr inline int floor_log2(const T x) { return suisen::bit_num<T> - 1 - count_lz(x); }
template <typename T> constexpr inline int ceil_log2(const T x) { return floor_log2(x) + ((x & -x) != x); }
template <typename T> constexpr inline int kth_bit(const T x, const unsigned int k) { return (x >> k) & 1; }
template <typename T> constexpr inline int parity(const T x) { return popcount(x) & 1; }

// ! container

template <typename T, typename Comparator>
auto priqueue_comp(const Comparator comparator) {
    return std::priority_queue<T, std::vector<T>, Comparator>(comparator);
}

template <typename Container>
void sort_unique_erase(Container& a) {
    std::sort(a.begin(), a.end());
    a.erase(std::unique(a.begin(), a.end()), a.end());
}

template <typename InputIterator, typename BiConsumer>
auto foreach_adjacent_values(InputIterator first, InputIterator last, BiConsumer f) -> decltype(f(*first++, *last), void()) {
    if (first != last) for (auto itr = first, itl = itr++; itr != last; itl = itr++) f(*itl, *itr);
}
template <typename Container, typename BiConsumer>
auto foreach_adjacent_values(Container &&c, BiConsumer f) -> decltype(c.begin(), c.end(), void()) {
    foreach_adjacent_values(c.begin(), c.end(), f);
}

// ! other utilities

// x <- min(x, y). returns true iff `x` has chenged.
template <typename T>
inline bool chmin(T& x, const T& y) {
    return y >= x ? false : (x = y, true);
}
// x <- max(x, y). returns true iff `x` has chenged.
template <typename T>
inline bool chmax(T& x, const T& y) {
    return y <= x ? false : (x = y, true);
}

template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
std::string bin(T val, int bit_num = -1) {
    std::string res;
    if (bit_num != -1) {
        for (int bit = bit_num; bit-- > 0;) res += '0' + ((val >> bit) & 1);
    } else {
        for (; val; val >>= 1) res += '0' + (val & 1);
        std::reverse(res.begin(), res.end());
    }
    return res;
}

template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
std::vector<T> digits_low_to_high(T val, T base = 10) {
    std::vector<T> res;
    for (; val; val /= base) res.push_back(val % base);
    if (res.empty()) res.push_back(T{ 0 });
    return res;
}
template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
std::vector<T> digits_high_to_low(T val, T base = 10) {
    auto res = digits_low_to_high(val, base);
    std::reverse(res.begin(), res.end());
    return res;
}

template <typename T>
std::string join(const std::vector<T>& v, const std::string& sep, const std::string& end) {
    std::ostringstream ss;
    for (auto it = v.begin(); it != v.end();) {
        ss << *it;
        if (++it != v.end()) ss << sep;
    }
    ss << end;
    return ss.str();
}

template <typename Func, typename Seq>
auto transform_to_vector(const Func &f, const Seq &s) {
    std::vector<std::invoke_result_t<Func, typename Seq::value_type>> v;
    v.reserve(std::size(s)), std::transform(std::begin(s), std::end(s), std::back_inserter(v), f);
    return v;
}
template <typename T, typename Seq>
auto copy_to_vector(const Seq &s) {
    std::vector<T> v;
    v.reserve(std::size(s)), std::copy(std::begin(s), std::end(s), std::back_inserter(v));
    return v;
}
template <typename Seq>
Seq concat(Seq s, const Seq &t) {
    s.reserve(std::size(s) + std::size(t));
    std::copy(std::begin(t), std::end(t), std::back_inserter(s));
    return s;
}
template <typename Seq>
std::vector<Seq> split(const Seq s, typename Seq::value_type delim) {
    std::vector<Seq> res;
    for (auto itl = std::begin(s), itr = itl;; itl = ++itr) {
        while (itr != std::end(s) and *itr != delim) ++itr;
        res.emplace_back(itl, itr);
        if (itr == std::end(s)) return res;
    }
}

int digit_to_int(char c) { return c - '0'; }
int lowercase_to_int(char c) { return c - 'a'; }
int uppercase_to_int(char c) { return c - 'A'; }

std::vector<int> digit_str_to_ints(const std::string &s) {
    return transform_to_vector(digit_to_int, s);
}
std::vector<int> lowercase_str_to_ints(const std::string &s) {
    return transform_to_vector(lowercase_to_int, s);
}
std::vector<int> uppercase_str_to_ints(const std::string &s) {
    return transform_to_vector(uppercase_to_int, s);
}

const std::string Yes = "Yes", No = "No", YES = "YES", NO = "NO";

namespace suisen {}
using namespace suisen;
using namespace std;

struct io_setup {
    io_setup(int precision = 20) {
        std::ios::sync_with_stdio(false);
        std::cin.tie(nullptr);
        std::cout << std::fixed << std::setprecision(precision);
    }
} io_setup_ {};

// ! code from here

#include <atcoder/modint>

using mint = atcoder::modint1000000007;

namespace atcoder {
    std::istream& operator>>(std::istream& in, mint &a) {
        long long e; in >> e; a = e;
        return in;
    }
    
    std::ostream& operator<<(std::ostream& out, const mint &a) {
        out << a.val();
        return out;
    }
} // namespace atcoder

#include <numeric>
#include <optional>

#include <cmath>
#include <iostream>
#include <random>
#include <utility>

#include <array>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <tuple>

namespace suisen {
    namespace internal::montgomery {
        template <typename Int, typename DInt>
        struct Montgomery {
        private:
            static constexpr uint32_t bits = std::numeric_limits<Int>::digits;
            static constexpr Int mask = ~Int(0);
            // R = 2**32 or 2**64

            // 1. N is an odd number
            // 2. N < R
            // 3. gcd(N, R) = 1
            // 4. R * R2 - N * N2 = 1
            // 5. 0 < R2 < N
            // 6. 0 < N2 < R
            Int N, N2, R2;

            // RR = R * R (mod N)
            Int RR;
        public:
            constexpr Montgomery() = default;
            explicit constexpr Montgomery(Int N) : N(N), N2(calcN2(N)), R2(calcR2(N, N2)), RR(calcRR(N)) {
                assert(N & 1);
            }

            // @returns t * R (mod N)
            constexpr Int make(Int t) const {
                return reduce(static_cast<DInt>(t) * RR);
            }
            // @returns T * R^(-1) (mod N)
            constexpr Int reduce(DInt T) const {
                // 0 <= T < RN

                // Note:
                //  1. m = T * N2 (mod R)
                //  2. 0 <= m < R
                DInt m = modR(static_cast<DInt>(modR(T)) * N2);

                // Note:
                //  T + m * N = T + T * N * N2 = T + T * (R * R2 - 1) = 0 (mod R)
                //  => (T + m * N) / R is an integer.
                //  => t * R = T + m * N = T (mod N)
                //  => t = T R^(-1) (mod N)
                DInt t = divR(T + m * N);

                // Note:
                //  1. 0 <= T < RN
                //  2. 0 <= mN < RN (because 0 <= m < R)
                //  => 0 <= T + mN < 2RN
                //  => 0 <= t < 2N
                return t >= N ? t - N : t;
            }

            constexpr Int add(Int A, Int B) const {
                return (A += B) >= N ? A - N : A;
            }
            constexpr Int sub(Int A, Int B) const {
                return (A -= B) < 0 ? A + N : A;
            }
            constexpr Int mul(Int A, Int B) const {
                return reduce(static_cast<DInt>(A) * B);
            }
            constexpr Int div(Int A, Int B) const {
                return reduce(static_cast<DInt>(A) * inv(B));
            }
            constexpr Int inv(Int A) const; // TODO: Implement

            constexpr Int pow(Int A, long long b) const {
                Int P = make(1);
                for (; b; b >>= 1) {
                    if (b & 1) P = mul(P, A);
                    A = mul(A, A);
                }
                return P;
            }

        private:
            static constexpr Int divR(DInt t) { return t >> bits; }
            static constexpr Int modR(DInt t) { return t & mask; }

            static constexpr Int calcN2(Int N) {
                // - N * N2 = 1 (mod R)
                // N2 = -N^{-1} (mod R)

                // calculates N^{-1} (mod R) by Newton's method
                DInt invN = N; // = N^{-1} (mod 2^2)
                for (uint32_t cur_bits = 2; cur_bits < bits; cur_bits *= 2) {
                    // loop invariant: invN = N^{-1} (mod 2^cur_bits)

                    // x = a^{-1} mod m => x(2-ax) = a^{-1} mod m^2 because:
                    //  ax = 1 (mod m)
                    //  => (ax-1)^2 = 0 (mod m^2)
                    //  => 2ax - a^2x^2 = 1 (mod m^2)
                    //  => a(x(2-ax)) = 1 (mod m^2)
                    invN = modR(invN * modR(2 - N * invN));
                }
                assert(modR(N * invN) == 1);

                return modR(-invN);
            }
            static constexpr Int calcR2(Int N, Int N2) {
                // R * R2 - N * N2 = 1
                // => R2 = (1 + N * N2) / R
                return divR(1 + static_cast<DInt>(N) * N2);
            }
            static constexpr Int calcRR(Int N) {
                return -DInt(N) % N;
            }
        };
    } // namespace internal::montgomery
    using Montgomery32 = internal::montgomery::Montgomery<uint32_t, uint64_t>;
    using Montgomery64 = internal::montgomery::Montgomery<uint64_t, __uint128_t>;
} // namespace suisen

namespace suisen::miller_rabin {
    namespace internal {
        constexpr uint64_t THRESHOLD_1 = 341531ULL;
        constexpr uint64_t BASE_1[]{ 9345883071009581737ULL };

        constexpr uint64_t THRESHOLD_2 = 1050535501ULL;
        constexpr uint64_t BASE_2[]{ 336781006125ULL, 9639812373923155ULL };

        constexpr uint64_t THRESHOLD_3 = 350269456337ULL;
        constexpr uint64_t BASE_3[]{ 4230279247111683200ULL, 14694767155120705706ULL, 16641139526367750375ULL };

        constexpr uint64_t THRESHOLD_4 = 55245642489451ULL;
        constexpr uint64_t BASE_4[]{ 2ULL, 141889084524735ULL, 1199124725622454117ULL, 11096072698276303650ULL };

        constexpr uint64_t THRESHOLD_5 = 7999252175582851ULL;
        constexpr uint64_t BASE_5[]{ 2ULL, 4130806001517ULL, 149795463772692060ULL, 186635894390467037ULL, 3967304179347715805ULL };

        constexpr uint64_t THRESHOLD_6 = 585226005592931977ULL;
        constexpr uint64_t BASE_6[]{ 2ULL, 123635709730000ULL, 9233062284813009ULL, 43835965440333360ULL, 761179012939631437ULL, 1263739024124850375ULL };

        constexpr uint64_t BASE_7[]{ 2U, 325U, 9375U, 28178U, 450775U, 9780504U, 1795265022U };

        template <auto BASE, std::size_t SIZE>
        constexpr bool miller_rabin(uint64_t n) {
            if (n == 2 or n == 3 or n == 5 or n == 7) return true;
            if (n <= 1 or n % 2 == 0 or n % 3 == 0 or n % 5 == 0 or n % 7 == 0) return false;
            if (n < 121) return true;

            const uint32_t s = __builtin_ctzll(n - 1); // >= 1
            const uint64_t d = (n - 1) >> s;

            const Montgomery64 mg{ n };

            const uint64_t one = mg.make(1), minus_one = mg.make(n - 1);

            for (std::size_t i = 0; i < SIZE; ++i) {
                uint64_t a = BASE[i] % n;
                if (a == 0) continue;
                uint64_t Y = mg.pow(mg.make(a), d);
                if (Y == one) continue;
                for (uint32_t r = 0;; ++r, Y = mg.mul(Y, Y)) {
                    // Y = a^(d 2^r)
                    if (Y == minus_one) break;
                    if (r == s - 1) return false;
                }
            }
            return true;
        }
    }

    template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
    constexpr bool is_prime(T n) {
        if constexpr (std::is_signed_v<T>) {
            assert(n >= 0);
        }
        const std::make_unsigned_t<T> n_unsigned = n;
        assert(n_unsigned <= std::numeric_limits<uint64_t>::max()); // n < 2^64
        using namespace internal;
        if (n_unsigned < THRESHOLD_1) return miller_rabin<BASE_1, 1>(n_unsigned);
        if (n_unsigned < THRESHOLD_2) return miller_rabin<BASE_2, 2>(n_unsigned);
        if (n_unsigned < THRESHOLD_3) return miller_rabin<BASE_3, 3>(n_unsigned);
        if (n_unsigned < THRESHOLD_4) return miller_rabin<BASE_4, 4>(n_unsigned);
        if (n_unsigned < THRESHOLD_5) return miller_rabin<BASE_5, 5>(n_unsigned);
        if (n_unsigned < THRESHOLD_6) return miller_rabin<BASE_6, 6>(n_unsigned);
        return miller_rabin<BASE_7, 7>(n_unsigned);
    }
} // namespace suisen::miller_rabin

#include <vector>

namespace suisen::internal::sieve {

constexpr std::uint8_t K = 8;
constexpr std::uint8_t PROD = 2 * 3 * 5;
constexpr std::uint8_t RM[K] = { 1,  7, 11, 13, 17, 19, 23, 29 };
constexpr std::uint8_t DR[K] = { 6,  4,  2,  4,  2,  4,  6,  2 };
constexpr std::uint8_t DF[K][K] = {
    { 0, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 1, 0, 1, 1, 1, 1 },
    { 2, 2, 0, 2, 0, 2, 2, 1 }, { 3, 1, 1, 2, 1, 1, 3, 1 },
    { 3, 3, 1, 2, 1, 3, 3, 1 }, { 4, 2, 2, 2, 2, 2, 4, 1 },
    { 5, 3, 1, 4, 1, 3, 5, 1 }, { 6, 4, 2, 4, 2, 4, 6, 1 },
};
constexpr std::uint8_t DRP[K] = { 48, 32, 16, 32, 16, 32, 48, 16 };
constexpr std::uint8_t DFP[K][K] = {
    {  0,  0,  0,  0,  0,  0,  0,  8 }, {  8,  8,  8,  0,  8,  8,  8,  8 },
    { 16, 16,  0, 16,  0, 16, 16,  8 }, { 24,  8,  8, 16,  8,  8, 24,  8 },
    { 24, 24,  8, 16,  8, 24, 24,  8 }, { 32, 16, 16, 16, 16, 16, 32,  8 },
    { 40, 24,  8, 32,  8, 24, 40,  8 }, { 48, 32, 16, 32, 16, 32, 48,  8 },
};

constexpr std::uint8_t MASK[K][K] = {
    { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }, { 0x02, 0x20, 0x10, 0x01, 0x80, 0x08, 0x04, 0x40 },
    { 0x04, 0x10, 0x01, 0x40, 0x02, 0x80, 0x08, 0x20 }, { 0x08, 0x01, 0x40, 0x20, 0x04, 0x02, 0x80, 0x10 },
    { 0x10, 0x80, 0x02, 0x04, 0x20, 0x40, 0x01, 0x08 }, { 0x20, 0x08, 0x80, 0x02, 0x40, 0x01, 0x10, 0x04 },
    { 0x40, 0x04, 0x08, 0x80, 0x01, 0x10, 0x20, 0x02 }, { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 },
};
constexpr std::uint8_t OFFSET[K][K] = {
    { 0, 1, 2, 3, 4, 5, 6, 7, },
    { 1, 5, 4, 0, 7, 3, 2, 6, },
    { 2, 4, 0, 6, 1, 7, 3, 5, },
    { 3, 0, 6, 5, 2, 1, 7, 4, },
    { 4, 7, 1, 2, 5, 6, 0, 3, },
    { 5, 3, 7, 1, 6, 0, 4, 2, },
    { 6, 2, 3, 7, 0, 4, 5, 1, },
    { 7, 6, 5, 4, 3, 2, 1, 0, },
};

constexpr std::uint8_t mask_to_index(const std::uint8_t bits) {
    switch (bits) {
        case 1 << 0: return 0;
        case 1 << 1: return 1;
        case 1 << 2: return 2;
        case 1 << 3: return 3;
        case 1 << 4: return 4;
        case 1 << 5: return 5;
        case 1 << 6: return 6;
        case 1 << 7: return 7;
        default: assert(false);
    }
}
} // namespace suisen::internal::sieve

namespace suisen {

template <unsigned int N>
class SimpleSieve {
    private:
        static constexpr unsigned int siz = N / internal::sieve::PROD + 1;
        static std::uint8_t flag[siz];
    public:
        SimpleSieve() {
            using namespace internal::sieve;
            flag[0] |= 1;
            unsigned int k_max = (unsigned int) std::sqrt(N + 2) / PROD;
            for (unsigned int kp = 0; kp <= k_max; ++kp) {
                for (std::uint8_t bits = ~flag[kp]; bits; bits &= bits - 1) {
                    const std::uint8_t mp = mask_to_index(bits & -bits), m = RM[mp];
                    unsigned int kr = kp * (PROD * kp + 2 * m) + m * m / PROD;
                    for (std::uint8_t mq = mp; kr < siz; kr += kp * DR[mq] + DF[mp][mq], ++mq &= 7) {
                        flag[kr] |= MASK[mp][mq];
                    }
                }
            }
        }
        std::vector<int> prime_list(unsigned int max_val = N) const {
            using namespace internal::sieve;
            std::vector<int> res { 2, 3, 5 };
            res.reserve(max_val / 25);
            for (unsigned int i = 0, offset = 0; i < siz and offset < max_val; ++i, offset += PROD) {
                for (uint8_t f = ~flag[i]; f;) {
                    uint8_t g = f & -f;
                    res.push_back(offset + RM[mask_to_index(g)]);
                    f ^= g;
                }
            }
            while (res.size() and (unsigned int) res.back() > max_val) res.pop_back();
            return res;
        }
        bool is_prime(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p) {
                case 2: case 3: case 5: return true;
                default:
                    switch (p % PROD) {
                        case RM[0]: return ((flag[p / PROD] >> 0) & 1) == 0;
                        case RM[1]: return ((flag[p / PROD] >> 1) & 1) == 0;
                        case RM[2]: return ((flag[p / PROD] >> 2) & 1) == 0;
                        case RM[3]: return ((flag[p / PROD] >> 3) & 1) == 0;
                        case RM[4]: return ((flag[p / PROD] >> 4) & 1) == 0;
                        case RM[5]: return ((flag[p / PROD] >> 5) & 1) == 0;
                        case RM[6]: return ((flag[p / PROD] >> 6) & 1) == 0;
                        case RM[7]: return ((flag[p / PROD] >> 7) & 1) == 0;
                        default: return false;
                    }
            }
        }
};
template <unsigned int N>
std::uint8_t SimpleSieve<N>::flag[SimpleSieve<N>::siz];

template <unsigned int N>
class Sieve {
    private:
        static constexpr unsigned int base_max = (N + 1) * internal::sieve::K / internal::sieve::PROD;
        static unsigned int pf[base_max + internal::sieve::K];

    public:
        Sieve() {
            using namespace internal::sieve;
            pf[0] = 1;
            unsigned int k_max = ((unsigned int) std::sqrt(N + 1) - 1) / PROD;
            for (unsigned int kp = 0; kp <= k_max; ++kp) {
                const int base_i = kp * K, base_act_i = kp * PROD;
                for (int mp = 0; mp < K; ++mp) {
                    const int m = RM[mp], i = base_i + mp;
                    if (pf[i] == 0) {
                        unsigned int act_i = base_act_i + m;
                        unsigned int base_k = (kp * (PROD * kp + 2 * m) + m * m / PROD) * K;
                        for (std::uint8_t mq = mp; base_k <= base_max; base_k += kp * DRP[mq] + DFP[mp][mq], ++mq &= 7) {
                            pf[base_k + OFFSET[mp][mq]] = act_i;
                        }
                    }
                }
            }
        }
        bool is_prime(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p) {
                case 2: case 3: case 5: return true;
                default:
                    switch (p % PROD) {
                        case RM[0]: return pf[p / PROD * K + 0] == 0;
                        case RM[1]: return pf[p / PROD * K + 1] == 0;
                        case RM[2]: return pf[p / PROD * K + 2] == 0;
                        case RM[3]: return pf[p / PROD * K + 3] == 0;
                        case RM[4]: return pf[p / PROD * K + 4] == 0;
                        case RM[5]: return pf[p / PROD * K + 5] == 0;
                        case RM[6]: return pf[p / PROD * K + 6] == 0;
                        case RM[7]: return pf[p / PROD * K + 7] == 0;
                        default: return false;
                    }
            }
        }
        int prime_factor(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p % PROD) {
                case  0: case  2: case  4: case  6: case  8:
                case 10: case 12: case 14: case 16: case 18:
                case 20: case 22: case 24: case 26: case 28: return 2;
                case  3: case  9: case 15: case 21: case 27: return 3;
                case  5: case 25: return 5;
                case RM[0]: return pf[p / PROD * K + 0] ? pf[p / PROD * K + 0] : p;
                case RM[1]: return pf[p / PROD * K + 1] ? pf[p / PROD * K + 1] : p;
                case RM[2]: return pf[p / PROD * K + 2] ? pf[p / PROD * K + 2] : p;
                case RM[3]: return pf[p / PROD * K + 3] ? pf[p / PROD * K + 3] : p;
                case RM[4]: return pf[p / PROD * K + 4] ? pf[p / PROD * K + 4] : p;
                case RM[5]: return pf[p / PROD * K + 5] ? pf[p / PROD * K + 5] : p;
                case RM[6]: return pf[p / PROD * K + 6] ? pf[p / PROD * K + 6] : p;
                case RM[7]: return pf[p / PROD * K + 7] ? pf[p / PROD * K + 7] : p;
                default: assert(false);
            }
        }
        /**
         * Returns a vector of `{ prime, index }`.
         */
        std::vector<std::pair<int, int>> factorize(unsigned int n) const {
            assert(0 < n and n <= N);
            std::vector<std::pair<int, int>> prime_powers;
            while (n > 1) {
                int p = prime_factor(n), c = 0;
                do { n /= p, ++c; } while (n % p == 0);
                prime_powers.emplace_back(p, c);
            }
            return prime_powers;
        }
        /**
         * Returns the divisors of `n`.
         * It is NOT guaranteed that the returned vector is sorted.
         */
        std::vector<int> divisors(unsigned int n) const {
            assert(0 < n and n <= N);
            std::vector<int> divs { 1 };
            for (auto [prime, index] : factorize(n)) {
                int sz = divs.size();
                for (int i = 0; i < sz; ++i) {
                    int d = divs[i];
                    for (int j = 0; j < index; ++j) {
                        divs.push_back(d *= prime);
                    }
                }
            }
            return divs;
        }
};
template <unsigned int N>
unsigned int Sieve<N>::pf[Sieve<N>::base_max + internal::sieve::K];
} // namespace suisen

namespace suisen::fast_factorize {
    namespace internal {
        template <typename T>
        constexpr int floor_log2(T n) {
            int i = 0;
            while (n) n >>= 1, ++i;
            return i - 1;
        }
        template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
        T pollard_rho(const T n) {
            using M = safely_multipliable_t<T>;
            const T m = T(1) << (floor_log2(n) / 5);

            static std::mt19937_64 rng{std::random_device{}()};
            std::uniform_int_distribution<T> dist(0, n - 1);

            // const Montgomery64 mg{n};

            while (true) {
                T c = dist(rng);
                auto f = [&](T x) -> T { return (M(x) * x + c) % n; };
                T x, y = 2, ys, q = 1, g = 1;
                for (T r = 1; g == 1; r <<= 1) {
                    x = y;
                    for (T i = 0; i < r; ++i) y = f(y);
                    for (T k = 0; k < r and g == 1; k += m) {
                        ys = y;
                        for (T i = 0; i < std::min(m, r - k); ++i) y = f(y), q = M(q) * (x > y ? x - y : y - x) % n;
                        g = std::gcd(q, n);
                    }
                }
                if (g == n) {
                    g = 1;
                    while (g == 1) ys = f(ys), g = std::gcd(x > ys ? x - ys : ys - x, n);
                }
                if (g < n) {
                    if (miller_rabin::is_prime(g)) return g;
                    if (T d = n / g; miller_rabin::is_prime(d)) return d;
                    return pollard_rho(g);
                }
            }
        }
    }

    template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
    std::vector<std::pair<T, int>> factorize(T n) {
        static constexpr int threshold = 1000000;
        static Sieve<threshold> sieve;

        std::vector<std::pair<T, int>> res;
        if (n <= threshold) {
            for (auto [p, q] : sieve.factorize(n)) res.emplace_back(p, q);
            return res;
        }

        if ((n & 1) == 0) {
            int q = 0;
            do ++q, n >>= 1; while ((n & 1) == 0);
            res.emplace_back(2, q);
        }
        for (T p = 3; p * p <= n; p += 2) {
            if (p >= 101 and n >= 1 << 20) {
                while (n > 1) {
                    if (miller_rabin::is_prime(n)) {
                        res.emplace_back(std::exchange(n, 1), 1);
                    } else {
                        p = internal::pollard_rho(n);
                        int q = 0;
                        do ++q, n /= p; while (n % p == 0);
                        res.emplace_back(p, q);
                    }
                }
                break;
            }
            if (n % p == 0) {
                int q = 0;
                do ++q, n /= p; while (n % p == 0);
                res.emplace_back(p, q);
            }
        }
        if (n > 1) res.emplace_back(n, 1);
        return res;
    }
} // namespace suisen::fast_factorize

/**
 * @brief $a \uparrow \uparrow b \pmod{m}$
*/

namespace suisen {
    namespace internal::tetration_mod {
        constexpr int max_value = std::numeric_limits<int>::max();
        int saturation_pow(int a, int b) {
            if (b >= 32) return max_value;
            long long res = 1, pow_a = a;
            for (; b; b >>= 1) {
                if (b & 1) res = std::min(res * pow_a, (long long) max_value);
                pow_a = std::min(pow_a * pow_a, (long long) max_value);
            }
            return res;
        }
        int saturation_tetration(int a, int b) {
            assert(a >= 2);
            if (b == 0) return 1;
            int exponent = 1;
            for (int i = 0; i < b and exponent != max_value; ++i) exponent = saturation_pow(a, exponent);
            return exponent;
        }
        int pow_mod(int a, int b, int m) {
            long long res = 1, pow_a = a;
            for (; b; b >>= 1) {
                if (b & 1) res = (res * pow_a) % m;
                pow_a = (pow_a * pow_a) % m;
            }
            return res;
        }
    }

    /**
     * @brief Calculates a↑↑b mod m (= a^(a^(a^...(b times)...)) mod m)
     * @param a base
     * @param b number of power operations
     * @param m mod
     * @return a↑↑b mod m
     */
    int tetration_mod(int a, int b, int m) {
        using namespace internal::tetration_mod;
        if (m == 1) return 0;
        if (a == 0) return 1 ^ (b & 1);
        if (a == 1 or b == 0) return 1;
        int i0 = 0, m0 = m;
        for (int g = std::gcd(m0, a); g != 1; g = std::gcd(m0, g)) {
            m0 /= g, ++i0;
        }
        int phi = m0;
        for (auto [p, q] : fast_factorize::factorize(m0)) {
            phi /= p, phi *= p - 1;
        }
        int exponent = saturation_tetration(a, b - 1);
        if (exponent == max_value) {
            exponent = tetration_mod(a, b - 1, phi);
            if (i0 > exponent) {
                exponent += (((i0 - exponent) + phi - 1) / phi) * phi;
            }
        } else if (i0 <= exponent) {
            exponent -= ((exponent - i0) / phi) * phi;
        }
        return pow_mod(a, exponent, m);
    }
}

int main() {
    int n, p;
    read(n, p);

    mint t = 0;
    
    for (int v = n / p; v; v /= p) {
        t += v;
    }

    debug(t);

    mint u = 1;
    REP(i, n) u *= i + 1;

    REP(i, n) {
        u = u.pow(i + 1);
    }

    print(t * u);

    return 0;
}
yukicoder

結果

ソースコード