結果

問題 No.2192 平方数の下14桁
ユーザー suisensuisen
提出日時 2023-01-14 02:14:45
言語 C++17
(gcc 12.3.0 + boost 1.83.0)
結果
AC  
実行時間 4 ms / 2,000 ms
コード長 22,251 bytes
コンパイル時間 2,682 ms
コンパイル使用メモリ 227,364 KB
実行使用メモリ 5,376 KB
最終ジャッジ日時 2024-06-07 02:01:26
合計ジャッジ時間 4,178 ms
ジャッジサーバーID
(参考情報)
judge2 / judge4
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 3 ms
5,248 KB
testcase_01 AC 3 ms
5,376 KB
testcase_02 AC 3 ms
5,376 KB
testcase_03 AC 3 ms
5,376 KB
testcase_04 AC 3 ms
5,376 KB
testcase_05 AC 3 ms
5,376 KB
testcase_06 AC 3 ms
5,376 KB
testcase_07 AC 3 ms
5,376 KB
testcase_08 AC 3 ms
5,376 KB
testcase_09 AC 4 ms
5,376 KB
testcase_10 AC 3 ms
5,376 KB
testcase_11 AC 3 ms
5,376 KB
testcase_12 AC 3 ms
5,376 KB
testcase_13 AC 3 ms
5,376 KB
testcase_14 AC 3 ms
5,376 KB
testcase_15 AC 3 ms
5,376 KB
testcase_16 AC 3 ms
5,376 KB
testcase_17 AC 3 ms
5,376 KB
testcase_18 AC 3 ms
5,376 KB
testcase_19 AC 3 ms
5,376 KB
testcase_20 AC 3 ms
5,376 KB
testcase_21 AC 4 ms
5,376 KB
testcase_22 AC 4 ms
5,376 KB
testcase_23 AC 4 ms
5,376 KB
testcase_24 AC 3 ms
5,376 KB
testcase_25 AC 3 ms
5,376 KB
testcase_26 AC 3 ms
5,376 KB
testcase_27 AC 3 ms
5,376 KB
testcase_28 AC 3 ms
5,376 KB
testcase_29 AC 3 ms
5,376 KB
testcase_30 AC 3 ms
5,376 KB
testcase_31 AC 3 ms
5,376 KB
testcase_32 AC 3 ms
5,376 KB
testcase_33 AC 3 ms
5,376 KB
testcase_34 AC 3 ms
5,376 KB
testcase_35 AC 3 ms
5,376 KB
testcase_36 AC 3 ms
5,376 KB
testcase_37 AC 3 ms
5,376 KB
testcase_38 AC 3 ms
5,376 KB
testcase_39 AC 3 ms
5,376 KB
testcase_40 AC 3 ms
5,376 KB
testcase_41 AC 3 ms
5,376 KB
testcase_42 AC 4 ms
5,376 KB
testcase_43 AC 3 ms
5,376 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#include <bits/stdc++.h>
using namespace std;

#include <cmath>
#include <iostream>
#include <random>
#include <numeric>
#include <utility>

#include <cassert>
#include <cstdint>
#include <iterator>

#include <limits>
#include <type_traits>

namespace suisen {
// ! utility
template <typename ...Types>
using constraints_t = std::enable_if_t<std::conjunction_v<Types...>, std::nullptr_t>;
template <bool cond_v, typename Then, typename OrElse>
constexpr decltype(auto) constexpr_if(Then&& then, OrElse&& or_else) {
    if constexpr (cond_v) {
        return std::forward<Then>(then);
    } else {
        return std::forward<OrElse>(or_else);
    }
}

// ! function
template <typename ReturnType, typename Callable, typename ...Args>
using is_same_as_invoke_result = std::is_same<std::invoke_result_t<Callable, Args...>, ReturnType>;
template <typename F, typename T>
using is_uni_op = is_same_as_invoke_result<T, F, T>;
template <typename F, typename T>
using is_bin_op = is_same_as_invoke_result<T, F, T, T>;

template <typename Comparator, typename T>
using is_comparator = std::is_same<std::invoke_result_t<Comparator, T, T>, bool>;

// ! integral
template <typename T, typename = constraints_t<std::is_integral<T>>>
constexpr int bit_num = std::numeric_limits<std::make_unsigned_t<T>>::digits;
template <typename T, unsigned int n>
struct is_nbit { static constexpr bool value = bit_num<T> == n; };
template <typename T, unsigned int n>
static constexpr bool is_nbit_v = is_nbit<T, n>::value;

// ?
template <typename T>
struct safely_multipliable {};
template <>
struct safely_multipliable<int> { using type = long long; };
template <>
struct safely_multipliable<long long> { using type = __int128_t; };
template <>
struct safely_multipliable<unsigned int> { using type = unsigned long long; };
template <>
struct safely_multipliable<unsigned long int> { using type = __uint128_t; };
template <>
struct safely_multipliable<unsigned long long> { using type = __uint128_t; };
template <>
struct safely_multipliable<float> { using type = float; };
template <>
struct safely_multipliable<double> { using type = double; };
template <>
struct safely_multipliable<long double> { using type = long double; };
template <typename T>
using safely_multipliable_t = typename safely_multipliable<T>::type;

template <typename T, typename = void>
struct rec_value_type {
    using type = T;
};
template <typename T>
struct rec_value_type<T, std::void_t<typename T::value_type>> {
    using type = typename rec_value_type<typename T::value_type>::type;
};
template <typename T>
using rec_value_type_t = typename rec_value_type<T>::type;

} // namespace suisen

namespace suisen::miller_rabin {
    namespace internal {
        constexpr uint32_t THRESHOLD_1 = 341531U;
        constexpr uint64_t BASE_1[] { 9345883071009581737ULL };

        constexpr uint32_t THRESHOLD_2 = 1050535501U;
        constexpr uint64_t BASE_2[] { 336781006125ULL, 9639812373923155ULL };

        constexpr uint64_t THRESHOLD_3 = 350269456337ULL;
        constexpr uint64_t BASE_3[] { 4230279247111683200ULL, 14694767155120705706ULL, 16641139526367750375ULL };

        constexpr uint64_t THRESHOLD_4 = 55245642489451ULL;
        constexpr uint64_t BASE_4[] { 2ULL, 141889084524735ULL, 1199124725622454117ULL, 11096072698276303650ULL };

        constexpr uint64_t THRESHOLD_5 = 7999252175582851ULL;
        constexpr uint64_t BASE_5[] { 2ULL, 4130806001517ULL, 149795463772692060ULL, 186635894390467037ULL, 3967304179347715805ULL };

        constexpr uint64_t THRESHOLD_6 = 585226005592931977ULL;
        constexpr uint64_t BASE_6[] { 2ULL, 123635709730000ULL, 9233062284813009ULL, 43835965440333360ULL, 761179012939631437ULL, 1263739024124850375ULL };

        constexpr uint32_t BASE_7[] { 2U, 325U, 9375U, 28178U, 450775U, 9780504U, 1795265022U };

        template <auto BASE, std::size_t SIZE, typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
        constexpr bool miller_rabin(T _n) {
            using U = std::make_unsigned_t<T>;
            using M = safely_multipliable_t<U>;

            U n = _n, d = (n - 1) >> __builtin_ctzll(n - 1);

            if (n == 2 or n == 3 or n == 5 or n == 7) return true;
            if (n % 2 == 0 or n % 3 == 0 or n % 5 == 0 or n % 7 == 0) return false;

            for (std::size_t i = 0; i < SIZE; ++i) {
                M y = 1, p = BASE[i] % n;
                if (p == 0) continue;
                for (U d2 = d; d2; d2 >>= 1) {
                    if (d2 & 1) y = y * p % n;
                    p = p * p % n;
                }
                if (y == 1) continue;
                for (U t = d; y != n - 1; t <<= 1) {
                    y = y * y % n;
                    if (y == 1 or t == n - 1) return false;
                }
            }
            return true;
        }
    }

    template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
    constexpr bool is_prime(T n) {
        if (n <= 1) return false;
        using U = std::make_unsigned_t<T>;
        U n2 = n;
        using namespace internal;
        if (n2 < THRESHOLD_1) {
            return miller_rabin<BASE_1, 1>(n2);
        } else if (n2 < THRESHOLD_2) {
            return miller_rabin<BASE_2, 2>(n2);
        } else if (n2 < THRESHOLD_3) {
            return miller_rabin<BASE_3, 3>(n2);
        } else if (n2 < THRESHOLD_4) {
            return miller_rabin<BASE_4, 4>(n2);
        } else if (n2 < THRESHOLD_5) {
            return miller_rabin<BASE_5, 5>(n2);
        } else if (n2 < THRESHOLD_6) {
            return miller_rabin<BASE_6, 6>(n2);
        } else {
            return miller_rabin<BASE_7, 7>(n2);
        }
    }
} // namespace suisen::miller_rabin

#include <vector>

namespace suisen::internal::sieve {

constexpr std::uint8_t K = 8;
constexpr std::uint8_t PROD = 2 * 3 * 5;
constexpr std::uint8_t RM[K] = { 1,  7, 11, 13, 17, 19, 23, 29 };
constexpr std::uint8_t DR[K] = { 6,  4,  2,  4,  2,  4,  6,  2 };
constexpr std::uint8_t DF[K][K] = {
    { 0, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 1, 0, 1, 1, 1, 1 },
    { 2, 2, 0, 2, 0, 2, 2, 1 }, { 3, 1, 1, 2, 1, 1, 3, 1 },
    { 3, 3, 1, 2, 1, 3, 3, 1 }, { 4, 2, 2, 2, 2, 2, 4, 1 },
    { 5, 3, 1, 4, 1, 3, 5, 1 }, { 6, 4, 2, 4, 2, 4, 6, 1 },
};
constexpr std::uint8_t DRP[K] = { 48, 32, 16, 32, 16, 32, 48, 16 };
constexpr std::uint8_t DFP[K][K] = {
    {  0,  0,  0,  0,  0,  0,  0,  8 }, {  8,  8,  8,  0,  8,  8,  8,  8 },
    { 16, 16,  0, 16,  0, 16, 16,  8 }, { 24,  8,  8, 16,  8,  8, 24,  8 },
    { 24, 24,  8, 16,  8, 24, 24,  8 }, { 32, 16, 16, 16, 16, 16, 32,  8 },
    { 40, 24,  8, 32,  8, 24, 40,  8 }, { 48, 32, 16, 32, 16, 32, 48,  8 },
};

constexpr std::uint8_t MASK[K][K] = {
    { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }, { 0x02, 0x20, 0x10, 0x01, 0x80, 0x08, 0x04, 0x40 },
    { 0x04, 0x10, 0x01, 0x40, 0x02, 0x80, 0x08, 0x20 }, { 0x08, 0x01, 0x40, 0x20, 0x04, 0x02, 0x80, 0x10 },
    { 0x10, 0x80, 0x02, 0x04, 0x20, 0x40, 0x01, 0x08 }, { 0x20, 0x08, 0x80, 0x02, 0x40, 0x01, 0x10, 0x04 },
    { 0x40, 0x04, 0x08, 0x80, 0x01, 0x10, 0x20, 0x02 }, { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 },
};
constexpr std::uint8_t OFFSET[K][K] = {
    { 0, 1, 2, 3, 4, 5, 6, 7, },
    { 1, 5, 4, 0, 7, 3, 2, 6, },
    { 2, 4, 0, 6, 1, 7, 3, 5, },
    { 3, 0, 6, 5, 2, 1, 7, 4, },
    { 4, 7, 1, 2, 5, 6, 0, 3, },
    { 5, 3, 7, 1, 6, 0, 4, 2, },
    { 6, 2, 3, 7, 0, 4, 5, 1, },
    { 7, 6, 5, 4, 3, 2, 1, 0, },
};

constexpr std::uint8_t mask_to_index(const std::uint8_t bits) {
    switch (bits) {
        case 1 << 0: return 0;
        case 1 << 1: return 1;
        case 1 << 2: return 2;
        case 1 << 3: return 3;
        case 1 << 4: return 4;
        case 1 << 5: return 5;
        case 1 << 6: return 6;
        case 1 << 7: return 7;
        default: assert(false);
    }
}
} // namespace suisen::internal::sieve

namespace suisen {

template <unsigned int N>
class SimpleSieve {
    private:
        static constexpr unsigned int siz = N / internal::sieve::PROD + 1;
        static std::uint8_t flag[siz];
    public:
        SimpleSieve() {
            using namespace internal::sieve;
            flag[0] |= 1;
            unsigned int k_max = (unsigned int) std::sqrt(N + 2) / PROD;
            for (unsigned int kp = 0; kp <= k_max; ++kp) {
                for (std::uint8_t bits = ~flag[kp]; bits; bits &= bits - 1) {
                    const std::uint8_t mp = mask_to_index(bits & -bits), m = RM[mp];
                    unsigned int kr = kp * (PROD * kp + 2 * m) + m * m / PROD;
                    for (std::uint8_t mq = mp; kr < siz; kr += kp * DR[mq] + DF[mp][mq], ++mq &= 7) {
                        flag[kr] |= MASK[mp][mq];
                    }
                }
            }
        }
        std::vector<int> prime_list(unsigned int max_val = N) const {
            using namespace internal::sieve;
            std::vector<int> res { 2, 3, 5 };
            res.reserve(max_val / 25);
            for (unsigned int i = 0, offset = 0; i < siz and offset < max_val; ++i, offset += PROD) {
                for (uint8_t f = ~flag[i]; f;) {
                    uint8_t g = f & -f;
                    res.push_back(offset + RM[mask_to_index(g)]);
                    f ^= g;
                }
            }
            while (res.size() and (unsigned int) res.back() > max_val) res.pop_back();
            return res;
        }
        bool is_prime(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p) {
                case 2: case 3: case 5: return true;
                default:
                    switch (p % PROD) {
                        case RM[0]: return ((flag[p / PROD] >> 0) & 1) == 0;
                        case RM[1]: return ((flag[p / PROD] >> 1) & 1) == 0;
                        case RM[2]: return ((flag[p / PROD] >> 2) & 1) == 0;
                        case RM[3]: return ((flag[p / PROD] >> 3) & 1) == 0;
                        case RM[4]: return ((flag[p / PROD] >> 4) & 1) == 0;
                        case RM[5]: return ((flag[p / PROD] >> 5) & 1) == 0;
                        case RM[6]: return ((flag[p / PROD] >> 6) & 1) == 0;
                        case RM[7]: return ((flag[p / PROD] >> 7) & 1) == 0;
                        default: return false;
                    }
            }
        }
};
template <unsigned int N>
std::uint8_t SimpleSieve<N>::flag[SimpleSieve<N>::siz];

template <unsigned int N>
class Sieve {
    private:
        static constexpr unsigned int base_max = (N + 1) * internal::sieve::K / internal::sieve::PROD;
        static unsigned int pf[base_max + internal::sieve::K];

    public:
        Sieve() {
            using namespace internal::sieve;
            pf[0] = 1;
            unsigned int k_max = ((unsigned int) std::sqrt(N + 1) - 1) / PROD;
            for (unsigned int kp = 0; kp <= k_max; ++kp) {
                const int base_i = kp * K, base_act_i = kp * PROD;
                for (int mp = 0; mp < K; ++mp) {
                    const int m = RM[mp], i = base_i + mp;
                    if (pf[i] == 0) {
                        unsigned int act_i = base_act_i + m;
                        unsigned int base_k = (kp * (PROD * kp + 2 * m) + m * m / PROD) * K;
                        for (std::uint8_t mq = mp; base_k <= base_max; base_k += kp * DRP[mq] + DFP[mp][mq], ++mq &= 7) {
                            pf[base_k + OFFSET[mp][mq]] = act_i;
                        }
                    }
                }
            }
        }
        bool is_prime(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p) {
                case 2: case 3: case 5: return true;
                default:
                    switch (p % PROD) {
                        case RM[0]: return pf[p / PROD * K + 0] == 0;
                        case RM[1]: return pf[p / PROD * K + 1] == 0;
                        case RM[2]: return pf[p / PROD * K + 2] == 0;
                        case RM[3]: return pf[p / PROD * K + 3] == 0;
                        case RM[4]: return pf[p / PROD * K + 4] == 0;
                        case RM[5]: return pf[p / PROD * K + 5] == 0;
                        case RM[6]: return pf[p / PROD * K + 6] == 0;
                        case RM[7]: return pf[p / PROD * K + 7] == 0;
                        default: return false;
                    }
            }
        }
        int prime_factor(const unsigned int p) const {
            using namespace internal::sieve;
            switch (p % PROD) {
                case  0: case  2: case  4: case  6: case  8:
                case 10: case 12: case 14: case 16: case 18:
                case 20: case 22: case 24: case 26: case 28: return 2;
                case  3: case  9: case 15: case 21: case 27: return 3;
                case  5: case 25: return 5;
                case RM[0]: return pf[p / PROD * K + 0] ? pf[p / PROD * K + 0] : p;
                case RM[1]: return pf[p / PROD * K + 1] ? pf[p / PROD * K + 1] : p;
                case RM[2]: return pf[p / PROD * K + 2] ? pf[p / PROD * K + 2] : p;
                case RM[3]: return pf[p / PROD * K + 3] ? pf[p / PROD * K + 3] : p;
                case RM[4]: return pf[p / PROD * K + 4] ? pf[p / PROD * K + 4] : p;
                case RM[5]: return pf[p / PROD * K + 5] ? pf[p / PROD * K + 5] : p;
                case RM[6]: return pf[p / PROD * K + 6] ? pf[p / PROD * K + 6] : p;
                case RM[7]: return pf[p / PROD * K + 7] ? pf[p / PROD * K + 7] : p;
                default: assert(false);
            }
        }
        /**
         * Returns a vector of `{ prime, index }`.
         */
        std::vector<std::pair<int, int>> factorize(unsigned int n) const {
            assert(0 < n and n <= N);
            std::vector<std::pair<int, int>> prime_powers;
            while (n > 1) {
                int p = prime_factor(n), c = 0;
                do { n /= p, ++c; } while (n % p == 0);
                prime_powers.emplace_back(p, c);
            }
            return prime_powers;
        }
        /**
         * Returns the divisors of `n`.
         * It is NOT guaranteed that the returned vector is sorted.
         */
        std::vector<int> divisors(unsigned int n) const {
            assert(0 < n and n <= N);
            std::vector<int> divs { 1 };
            for (auto [prime, index] : factorize(n)) {
                int sz = divs.size();
                for (int i = 0; i < sz; ++i) {
                    int d = divs[i];
                    for (int j = 0; j < index; ++j) {
                        divs.push_back(d *= prime);
                    }
                }
            }
            return divs;
        }
};
template <unsigned int N>
unsigned int Sieve<N>::pf[Sieve<N>::base_max + internal::sieve::K];
} // namespace suisen

namespace suisen::fast_factorize {
    namespace internal {
        template <typename T>
        constexpr int floor_log2(T n) {
            int i = 0;
            while (n) n >>= 1, ++i;
            return i - 1;
        }
        template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
        T pollard_rho(T n) {
            using M = safely_multipliable_t<T>;
            const T m = T(1) << (floor_log2(n) / 5);

            static std::mt19937_64 rng{std::random_device{}()};
            std::uniform_int_distribution<T> dist(0, n - 1);

            while (true) {
                T c = dist(rng);
                auto f = [&](T x) -> T { return (M(x) * x + c) % n; };
                T x, y = 2, ys, q = 1, g = 1;
                for (T r = 1; g == 1; r <<= 1) {
                    x = y;
                    for (T i = 0; i < r; ++i) y = f(y);
                    for (T k = 0; k < r and g == 1; k += m) {
                        ys = y;
                        for (T i = 0; i < std::min(m, r - k); ++i) y = f(y), q = M(q) * (x > y ? x - y : y - x) % n;
                        g = std::gcd(q, n);
                    }
                }
                if (g == n) {
                    g = 1;
                    while (g == 1) ys = f(ys), g = std::gcd(x > ys ? x - ys : ys - x, n);
                }
                if (g < n) {
                    if (miller_rabin::is_prime(g)) return g;
                    if (T d = n / g; miller_rabin::is_prime(d)) return d;
                    return pollard_rho(g);
                }
            }
        }
    }

    template <typename T, std::enable_if_t<std::is_integral_v<T>, std::nullptr_t> = nullptr>
    std::vector<std::pair<T, int>> factorize(T n) {
        static constexpr int threshold = 1000000;
        static Sieve<threshold> sieve;

        std::vector<std::pair<T, int>> res;
        if (n <= threshold) {
            for (auto [p, q] : sieve.factorize(n)) res.emplace_back(p, q);
            return res;
        }

        if ((n & 1) == 0) {
            int q = 0;
            do ++q, n >>= 1; while ((n & 1) == 0);
            res.emplace_back(2, q);
        }
        for (T p = 3; p * p <= n; p += 2) {
            if (p >= 101 and n >= 1 << 20) {
                while (n > 1) {
                    if (miller_rabin::is_prime(n)) {
                        res.emplace_back(std::exchange(n, 1), 1);
                    } else {
                        p = internal::pollard_rho(n);
                        int q = 0;
                        do ++q, n /= p; while (n % p == 0);
                        res.emplace_back(p, q);
                    }
                }
                break;
            }
            if (n % p == 0) {
                int q = 0;
                do ++q, n /= p; while (n % p == 0);
                res.emplace_back(p, q);
            }
        }
        if (n > 1) res.emplace_back(n, 1);
        return res;
    }
} // namespace suisen::fast_factorize

#include <optional>
#include <atcoder/math>

namespace suisen {
    namespace internal {
        long long inv_mod64(long long a, long long m) {
            return atcoder::inv_mod(a, m);
        }
        long long pow_mod64(long long a, long long b, long long m) {
            if ((a %= m) < 0) a += m;
            long long res = 1, pow_a = a;
            for (; b; b >>= 1) {
                if (b & 1) {
                    res = __int128_t(res) * pow_a % m;
                }
                pow_a = __int128_t(pow_a) * pow_a % m;
            }
            return res;
        }
        long long mul_mod64(long long a, long long b, long long m) {
            return __int128_t(a) * b % m;
        }
    }

    std::optional<long long> prime_mod_sqrt(long long a, const long long p) {
        using namespace internal;

        if ((a %= p) < 0) a += p;

        if (a == 0) return 0;
        if (p == 2) return a;

        if (pow_mod64(a, (p - 1) / 2, p) != 1) {
            return std::nullopt;
        }

        long long b = 1;
        while (pow_mod64(b, (p - 1) / 2, p) == 1) {
            ++b;
        }

        int tlz = __builtin_ctz(p - 1);
        long long q = (p - 1) >> tlz;

        long long ia = inv_mod64(a, p);

        long long x = pow_mod64(a, (q + 1) / 2, p);
        b = pow_mod64(b, q, p);
        for (int shift = 2;; ++shift) {
            long long x2 = mul_mod64(x, x, p);
            if (x2 == a) {
                return x;
            }
            long long e = mul_mod64(ia, x2, p);
            if (pow_mod64(e, 1 << (tlz - shift), p) != 1) {
                x = mul_mod64(x, b, p);
            }
            b = mul_mod64(b, b, p);
        }
    }

    namespace internal {
        std::optional<long long> prime_power_mod_sqrt(long long a, long long p, int q) {
            std::vector<long long> pq(q + 1);
            pq[0] = 1;
            for (int i = 1; i <= q; ++i) {
                pq[i] = pq[i - 1] * p;
            }

            a %= pq[q];

            if (a == 0) return 0;

            int b = 0;
            for (; a % p == 0; a /= p) {
                ++b;
            }
            if (b % 2) {
                return std::nullopt;
            }
            const long long c = pq[b / 2];

            q -= b;

            auto ox0 = prime_mod_sqrt(a, p);
            if (not ox0) {
                return std::nullopt;
            }

            long long x0 = *ox0;

            // f(x) = x^2 - a
            // f(x_i) = 0 mod p^i => f(x_{i+1}) = 0 mod p^{i+1}
            if (p != 2) {
                // reference: http://aozoragakuen.sakura.ne.jp/suuron/node24.html
                const long long inv_df_x0 = inv_mod64(2 * x0, p);
                for (int i = 1; i < q; ++i) {
                    __int128_t f_x0 = __int128_t(x0) * x0 - a;
                    long long y0 = mul_mod64(-f_x0 / pq[i] % p, inv_df_x0, p);
                    if (y0 < 0) y0 += p;
                    x0 += pq[i] * y0;
                }
                return c * x0;
            } else {
                // reference: https://twitter.com/maspy_stars/status/1613931151718244352?s=20&t=lAf7ztW2fb_IZa544lo2xw
                if (a % 8 != 1) return std::nullopt;
                if (q <= 2) return c * 1;

                long long x0 = 1;
                for (int i = 3; i < q; ++i) {
                    __int128_t f_x0 = __int128_t(x0) * x0 - a;
                    x0 |= ((f_x0 >> i) & 1) << (i - 1);
                }
                return c * x0;
            }
        }
    }

    template <typename PrimePowers>
    std::optional<long long> composite_mod_sqrt(long long a, const PrimePowers& factorized) {
        std::vector<long long> rs, ms;
        for (auto [p, q] : factorized) {
            auto x = internal::prime_power_mod_sqrt(a, p, q);
            if (not x) {
                return std::nullopt;
            }
            rs.push_back(*x);
            long long& pq = ms.emplace_back(1);
            for (int i = 0; i < q; ++i) pq *= p;
        }
        return atcoder::crt(rs, ms).first;
    }
} // namespace suisen

int main() {
    long long m, n;
    std::cin >> m >> n;
    auto x = suisen::composite_mod_sqrt(n, suisen::fast_factorize::factorize(m));
    if (x) {
        std::cout << "YES" << std::endl;
        assert(suisen::internal::pow_mod64(*x, 2, m) == n);
    } else {
        std::cout << "NO" << std::endl;
    }
}

0