#if !defined(__clang__) && defined(__GNUC__) #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") #endif #ifdef EVAL #define ONLINE_JUDGE #endif #ifdef ONLINE_JUDGE #define GSH_USE_COMPILE_TIME_CALCULATION #define NDEBUG #endif /** * * libcpprime https://github.com/Rac75116/libcpprime * * Copyright (c) 2025 Rac75116 * SPDX-License-Identifier: MIT * **/ #ifndef CPPR_INTERNAL_INCLUDED_IS_PRIME_NO_TABLE #define CPPR_INTERNAL_INCLUDED_IS_PRIME_NO_TABLE #include /** * * libcpprime https://github.com/Rac75116/libcpprime * * Copyright (c) 2025 Rac75116 * SPDX-License-Identifier: MIT * **/ /** * * The algorithm in this library is based on Bradley Berg's method. * See this page for more information: *https://www.techneon.com/download/is.prime.32.base.data * * Copyright 2018 Bradley Berg < (My last name) @ t e c h n e o n . c o m > * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * This algorithm is deliberately unpatented. The license above also * lets you even freely use it in commercial code. * * Primality testing using a hash table of bases originated with Steven Worley. * **/ #ifndef CPPR_INTERNAL_INCLUDED_IS_PRIME_COMMON #define CPPR_INTERNAL_INCLUDED_IS_PRIME_COMMON #include /** * * libcpprime https://github.com/Rac75116/libcpprime * * Copyright (c) 2025 Rac75116 * SPDX-License-Identifier: MIT * **/ // This file contains code derived from libdivide // https://libdivide.com // // Original work: // Copyright (C) 2010 - 2022 ridiculous_fish // Copyright (C) 2016 - 2022 Kim Walisch // // libdivide is dual-licensed under the Boost Software License 1.0 // and the zlib License. This project uses the Boost Software License 1.0. // // The original code has been modified and integrated into this library. // Modifications include refactoring and API replacement. // // Except for the portions derived from libdivide, the remainder of this // library is licensed under the MIT License. #ifndef CPPR_INTERNAL_INCLUDED_INTERNAL_UTILS #define CPPR_INTERNAL_INCLUDED_INTERNAL_UTILS #include #include #if defined(__has_include) && __has_include() && (!defined(_MSVC_LANG) || _MSVC_LANG >= 202002L) #include #endif #if defined(__cpp_lib_is_constant_evaluated) && defined(__cpp_constexpr) && __cpp_constexpr >= 201907L #define CPPR_INTERNAL_CONSTEXPR_ENABLED #define CPPR_INTERNAL_CONSTEXPR constexpr #else #define CPPR_INTERNAL_CONSTEXPR inline #endif #ifdef __cpp_if_constexpr #define CPPR_INTERNAL_IF_CONSTEXPR constexpr #else #define CPPR_INTERNAL_IF_CONSTEXPR #endif #ifdef __has_builtin #define CPPR_INTERNAL_HAS_BUILTIN(x) __has_builtin(x) #else #define CPPR_INTERNAL_HAS_BUILTIN(x) 0 #endif #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)) #if !defined(__clang__) || _MSC_VER > 1930 #include #define CPPR_INTERNAL_VC_MUL_INTRINSICS #pragma intrinsic(_umul128) #pragma intrinsic(__umulh) #endif #if !defined(__clang__) && _MSC_VER >= 1920 #include #define CPPR_INTERNAL_VC_DIV_INTRINSICS #pragma intrinsic(_udiv128) #endif #endif #if !defined(__cpp_lib_bitops) && defined(_MSC_VER) && !(CPPR_INTERNAL_HAS_BUILTIN(__builtin_ctzll) && CPPR_INTERNAL_HAS_BUILTIN(__builtin_clzll)) #include #pragma intrinsic(_BitScanForward64) #pragma intrinsic(_BitScanReverse64) #endif #ifdef __SIZEOF_INT128__ #define CPPR_INTERNAL_HAS_INT128_T #if !(defined(__clang__) && defined(_MSC_VER)) #define CPPR_INTERNAL_HAS_INT128_RUNTIME_DIV #endif #endif #if defined(__x86_64__) || defined(_M_X64) #define CPPR_INTERNAL_X86_64 #endif #if defined(__GNUC__) || defined(__clang__) #define CPPR_INTERNAL_GCC_STYLE_ASM #endif #ifdef __has_attribute #if __has_attribute(always_inline) #define CPPR_INTERNAL_CONSTEXPR_INLINE CPPR_INTERNAL_CONSTEXPR __attribute__((always_inline)) #endif #endif #ifndef CPPR_INTERNAL_CONSTEXPR_INLINE #ifdef _MSC_VER #define CPPR_INTERNAL_CONSTEXPR_INLINE CPPR_INTERNAL_CONSTEXPR __forceinline #else #define CPPR_INTERNAL_CONSTEXPR_INLINE CPPR_INTERNAL_CONSTEXPR #endif #endif namespace cppr { namespace internal { #ifdef CPPR_INTERNAL_CONSTEXPR_ENABLED constexpr bool IsConstantEvaluated() noexcept { return std::is_constant_evaluated(); } #else constexpr bool IsConstantEvaluated() noexcept { return std::false_type::value; } #endif #if defined(CPPR_INTERNAL_HAS_INT128_T) #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" using UInt128 = unsigned __int128; #pragma GCC diagnostic pop #else using UInt128 = unsigned __int128; #endif #endif struct Int64Pair { std::uint64_t high, low; }; CPPR_INTERNAL_CONSTEXPR_INLINE void Assume(const bool cond) noexcept { // Hint for the optimizer; ignored during constant evaluation. if (IsConstantEvaluated()) return; #if CPPR_INTERNAL_HAS_BUILTIN(__builtin_assume) __builtin_assume(cond); #elif CPPR_INTERNAL_HAS_BUILTIN(__builtin_unreachable) if (!cond) __builtin_unreachable(); #elif defined(_MSC_VER) __assume(cond); #else (void)cond; #endif } CPPR_INTERNAL_CONSTEXPR_INLINE std::int32_t CountrZero(std::uint32_t n) noexcept { // Precondition: n != 0 (matches std::countr_zero requirements). Assume(n != 0); #ifdef __cpp_lib_bitops return std::countr_zero(n); #else #if CPPR_INTERNAL_HAS_BUILTIN(__builtin_ctz) if (!IsConstantEvaluated()) return static_cast(__builtin_ctz(n)); #elif defined(_MSC_VER) if (!IsConstantEvaluated()) { unsigned long r; _BitScanForward(&r, n); return static_cast(r); } #endif std::int32_t count = 0; if ((n << 16) == 0) { count += 16; n >>= 16; } if ((n << 24) == 0) { count += 8; n >>= 8; } if ((n << 28) == 0) { count += 4; n >>= 4; } if ((n << 30) == 0) { count += 2; n >>= 2; } if ((n << 31) == 0) { ++count; } return count; #endif } CPPR_INTERNAL_CONSTEXPR_INLINE std::int32_t CountrZero(std::uint64_t n) noexcept { // Precondition: n != 0 (matches std::countr_zero requirements). Assume(n != 0); #ifdef __cpp_lib_bitops return std::countr_zero(n); #else #if CPPR_INTERNAL_HAS_BUILTIN(__builtin_ctzll) if (!IsConstantEvaluated()) return static_cast(__builtin_ctzll(n)); #elif defined(_MSC_VER) if (!IsConstantEvaluated()) { unsigned long r; _BitScanForward64(&r, n); return static_cast(r); } #endif bool f = (n & 0xffffffff) == 0; return (f ? 32 : 0) + CountrZero(static_cast(n >> (f ? 32 : 0))); #endif } CPPR_INTERNAL_CONSTEXPR_INLINE std::int32_t CountlZero(std::uint32_t n) noexcept { // Precondition: n != 0 (matches std::countl_zero requirements). Assume(n != 0); #ifdef __cpp_lib_bitops return std::countl_zero(n); #else #if CPPR_INTERNAL_HAS_BUILTIN(__builtin_clz) if (!IsConstantEvaluated()) return static_cast(__builtin_clz(n)); #elif defined(_MSC_VER) if (!IsConstantEvaluated()) { unsigned long r; _BitScanReverse(&r, n); return static_cast(r ^ 31); } #endif std::int32_t count = 0; if ((n >> 16) == 0) { count += 16; n <<= 16; } if ((n >> 24) == 0) { count += 8; n <<= 8; } if ((n >> 28) == 0) { count += 4; n <<= 4; } if ((n >> 30) == 0) { count += 2; n <<= 2; } if ((n >> 31) == 0) { ++count; } return count; #endif } CPPR_INTERNAL_CONSTEXPR_INLINE std::int32_t CountlZero(std::uint64_t n) noexcept { // Precondition: n != 0 (matches std::countl_zero requirements). Assume(n != 0); #ifdef __cpp_lib_bitops return std::countl_zero(n); #else #if CPPR_INTERNAL_HAS_BUILTIN(__builtin_clzll) if (!IsConstantEvaluated()) return static_cast(__builtin_clzll(n)); #elif defined(_MSC_VER) if (!IsConstantEvaluated()) { unsigned long r; _BitScanReverse64(&r, n); return static_cast(r ^ 63); } #endif bool f = (n >> 32) == 0; return (f ? 32 : 0) + CountlZero(static_cast(n >> (f ? 0 : 32))); #endif } CPPR_INTERNAL_CONSTEXPR_INLINE Int64Pair Mulu128(std::uint64_t muler, std::uint64_t mulnd) noexcept { // Full 128-bit product split into {high, low}. #if defined(CPPR_INTERNAL_HAS_INT128_T) UInt128 tmp = static_cast(muler) * mulnd; return {static_cast(tmp >> 64), static_cast(tmp)}; #else #if defined(CPPR_INTERNAL_VC_MUL_INTRINSICS) if (!IsConstantEvaluated()) { std::uint64_t high; std::uint64_t low = _umul128(muler, mulnd, &high); return {high, low}; } #endif std::uint32_t lhigh = muler >> 32; std::uint32_t llow = muler & 0xffffffff; std::uint32_t rhigh = mulnd >> 32; std::uint32_t rlow = mulnd & 0xffffffff; std::uint64_t mlh = static_cast(llow) * rhigh; std::uint64_t mhl = static_cast(lhigh) * rlow; std::uint64_t ma = mlh + mhl; std::uint64_t mll = static_cast(llow) * rlow; std::uint64_t mhh = static_cast(lhigh) * rhigh; std::uint64_t carry1 = static_cast(ma < mlh) << 32; std::uint64_t rl = mll + (ma << 32); std::uint64_t carry2 = rl < mll; return {mhh + carry1 + carry2 + (ma >> 32), rl}; #endif } CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t Mulu128High(std::uint64_t muler, std::uint64_t mulnd) noexcept { // High 64 bits of the 128-bit product. #if defined(CPPR_INTERNAL_HAS_INT128_T) return static_cast((static_cast(muler) * mulnd) >> 64); #else #if defined(CPPR_INTERNAL_VC_MUL_INTRINSICS) if (!IsConstantEvaluated()) return __umulh(muler, mulnd); #endif return Mulu128(muler, mulnd).high; #endif } CPPR_INTERNAL_CONSTEXPR std::uint64_t Modu128(std::uint64_t numhi, std::uint64_t numlo, std::uint64_t den) { // Computes ((numhi << 64) | numlo) % den. // Preconditions: den != 0 and numhi < den (so the quotient fits in 64 bits). Assume(den != 0); Assume(numhi < den); #if defined(CPPR_INTERNAL_X86_64) && defined(CPPR_INTERNAL_GCC_STYLE_ASM) if (!IsConstantEvaluated()) { std::uint64_t quot, rem; __asm__("div %[v]" : "=a"(quot), "=d"(rem) : [v] "r"(den), "a"(numlo), "d"(numhi)); return rem; } #elif defined(CPPR_INTERNAL_VC_DIV_INTRINSICS) if (!IsConstantEvaluated()) { std::uint64_t rem = 0; _udiv128(numhi, numlo, den, &rem); return rem; } #endif #if defined(CPPR_INTERNAL_HAS_INT128_RUNTIME_DIV) UInt128 tmp = (static_cast(numhi) << 64) | numlo; return static_cast(tmp % den); #else #if defined(CPPR_INTERNAL_HAS_INT128_T) if (IsConstantEvaluated()) { UInt128 tmp = (static_cast(numhi) << 64) | numlo; return static_cast(tmp % den); } #endif constexpr std::uint64_t b = (static_cast(1) << 32); std::uint32_t q1, q0; std::int32_t shift; std::uint32_t den1, den0, num1, num0; std::uint64_t rem, qhat, rhat, c1, c2; shift = CountlZero(den); den <<= shift; numhi <<= shift; numhi |= (numlo >> (-shift & 63)) & static_cast(-static_cast(shift) >> 63); numlo <<= shift; num1 = static_cast(numlo >> 32); num0 = static_cast(numlo & 0xFFFFFFFFu); den1 = static_cast(den >> 32); den0 = static_cast(den & 0xFFFFFFFFu); qhat = numhi / den1; rhat = numhi % den1; c1 = qhat * den0; c2 = rhat * b + num1; if (c1 > c2) qhat -= (c1 - c2 > den) ? 2 : 1; q1 = static_cast(qhat); rem = numhi * b + num1 - q1 * den; qhat = rem / den1; rhat = rem % den1; c1 = qhat * den0; c2 = rhat * b + num0; if (c1 > c2) qhat -= (c1 - c2 > den) ? 2 : 1; q0 = static_cast(qhat); return (rem * b + num0 - q0 * den) >> shift; #endif } } // namespace internal } // namespace cppr #endif namespace cppr { namespace internal { template class MontgomeryModint64Impl { std::uint64_t mod_ = 0, rs = 0, nr = 0, np = 0; CPPR_INTERNAL_CONSTEXPR std::uint64_t reduce(const std::uint64_t n) const noexcept { // Montgomery reduction of a 128-bit value with implicit low half `n`. std::uint64_t q = n * nr; if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { std::uint64_t m = Mulu128High(q, mod_); return m == 0 ? 0 : mod_ - m; } else { std::uint64_t m = Mulu128High(q, mod_); return mod_ - m; } } CPPR_INTERNAL_CONSTEXPR std::uint64_t reduce(const std::uint64_t a, const std::uint64_t b) const noexcept { // Montgomery reduction of the product a*b. auto tmp = Mulu128(a, b); std::uint64_t d = tmp.high; std::uint64_t c = tmp.low; std::uint64_t q = c * nr; if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { std::uint64_t m = Mulu128High(q, mod_); std::uint64_t t = d - m; return d < m ? t + mod_ : t; } else { std::uint64_t m = Mulu128High(q, mod_); return d + mod_ - m; } } public: CPPR_INTERNAL_CONSTEXPR MontgomeryModint64Impl(std::uint64_t n) noexcept { // Precondition: n is an odd modulus > 2. // Internals: // - rs: R^2 mod n (with R = 2^64) for Montgomery domain conversion // - nr: -n^{-1} mod R (Newton iteration) // - np: representation of 1 in Montgomery domain Assume(n > 2 && n % 2 != 0); mod_ = n; rs = Modu128(0xffffffffffffffff % n, 0 - n, n); nr = n; for (std::uint32_t i = 0; i != 5; ++i) nr *= 2 - n * nr; np = reduce(rs); } CPPR_INTERNAL_CONSTEXPR std::uint64_t build(std::uint32_t x) const noexcept { return reduce(x % mod_, rs); } CPPR_INTERNAL_CONSTEXPR std::uint64_t build(std::uint64_t x) const noexcept { return reduce(x % mod_, rs); } CPPR_INTERNAL_CONSTEXPR std::uint64_t raw(std::uint64_t x) const noexcept { Assume(x < mod_); return reduce(x, rs); } CPPR_INTERNAL_CONSTEXPR std::uint64_t val(std::uint64_t x) const noexcept { // Converts from Montgomery domain back to the standard residue. // Non-strict mode permits values in [0, 2*mod) for faster operations. if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_); return reduce(x); } else { Assume(x < 2 * mod_); std::uint64_t tmp = reduce(x); return tmp - mod_ * (tmp >= mod_); } } CPPR_INTERNAL_CONSTEXPR std::uint64_t one() const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(np < mod_); return np; } else { Assume(np < 2 * mod_); return np; } } CPPR_INTERNAL_CONSTEXPR std::uint64_t neg(std::uint64_t x) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_); return (mod_ - x) * (x != 0); } else { Assume(x < 2 * mod_); return (2 * mod_ - x) * (x != 0); } } CPPR_INTERNAL_CONSTEXPR std::uint64_t mul(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return reduce(x, y); } else { Assume(x < 2 * mod_ && y < 2 * mod_); return reduce(x, y); } } CPPR_INTERNAL_CONSTEXPR bool same(std::uint64_t x, std::uint64_t y) const noexcept { // Equality check that tolerates the relaxed range in non-strict mode. if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return x == y; } else { Assume(x < 2 * mod_ && y < 2 * mod_); std::uint64_t tmp = x - y; return (tmp == 0) || (tmp == mod_) || (tmp == 0 - mod_); } } CPPR_INTERNAL_CONSTEXPR bool is_zero(std::uint64_t x) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_); return x == 0; } else { Assume(x < 2 * mod_); return x == 0 || x == mod_; } } CPPR_INTERNAL_CONSTEXPR std::uint64_t add(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return x + y - (x >= mod_ - y) * mod_; } else { Assume(x < 2 * mod_ && y < 2 * mod_); return x + y - (x >= 2 * mod_ - y) * (2 * mod_); } } CPPR_INTERNAL_CONSTEXPR std::uint64_t sub(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return x - y + (x < y) * mod_; } else { Assume(x < 2 * mod_ && y < 2 * mod_); return x - y + (x < y) * (2 * mod_); } } }; CPPR_INTERNAL_CONSTEXPR_INLINE bool TrialDivision32(const std::uint32_t n) noexcept { // Branchless screening against a fixed set of small primes. return (n & 1) == 0 || 1431655766u > (0u - 1431655765u) * n || 858993460u > (0u - 858993459u) * n || 613566757u > (0u - 1227133513u) * n || 390451573u > (0u - 1171354717u) * n || 330382100u > (0u - 991146299u) * n || 252645136u > (0u - 252645135u) * n || 226050911u > 678152731u * n || 186737709u > (0u - 373475417u) * n; } constexpr std::uint16_t Bases32[256] = { // clang-format off 1216,1836,8885,4564,10978,5228,15613,13941,1553,173,3615,3144,10065,9259,233,2362,6244,6431,10863,5920,6408,6841,22124,2290,45597,6935,4835,7652,1051,445,5807,842,1534,22140,1282,1733,347,6311,14081,11157,186,703,9862,15490,1720,17816,10433,49185,2535,9158,2143,2840,664,29074,24924,1035,41482,1065,10189,8417,130,4551,5159,48886, 786,1938,1013,2139,7171,2143,16873,188,5555,42007,1045,3891,2853,23642,148,3585,3027,280,3101,9918,6452,2716,855,990,1925,13557,1063,6916,4965,4380,587,3214,1808,1036,6356,8191,6783,14424,6929,1002,840,422,44215,7753,5799,3415,231,2013,8895,2081,883,3855,5577,876,3574,1925,1192,865,7376,12254,5952,2516,20463,186, 5411,35353,50898,1084,2127,4305,115,7821,1265,16169,1705,1857,24938,220,3650,1057,482,1690,2718,4309,7496,1515,7972,3763,10954,2817,3430,1423,714,6734,328,2581,2580,10047,2797,155,5951,3817,54850,2173,1318,246,1807,2958,2697,337,4871,2439,736,37112,1226,527,7531,5418,7242,2421,16135,7015,8432,2605,5638,5161,11515,14949, 748,5003,9048,4679,1915,7652,9657,660,3054,15469,2910,775,14106,1749,136,2673,61814,5633,1244,2567,4989,1637,1273,11423,7974,7509,6061,531,6608,1088,1627,160,6416,11350,921,306,18117,1238,463,1722,996,3866,6576,6055,130,24080,7331,3922,8632,2706,24108,32374,4237,15302,287,2296,1220,20922,3350,2089,562,11745,163,11951 // clang-format on }; CPPR_INTERNAL_CONSTEXPR bool IsPrime32(const std::uint32_t x) noexcept { const std::uint32_t h = x * 0xad625b89; std::uint32_t d = x - 1; std::uint32_t pw = static_cast(Bases32[h >> 24]); std::int32_t s = CountrZero(d); d >>= s; if (x < (1u << 21)) { std::uint64_t m = 0xffffffffffffffff / x + 1; auto mul = [m, x](std::uint32_t a, std::uint32_t b) -> std::uint32_t { return static_cast(Mulu128High(static_cast(a) * b * m, x)); }; std::uint32_t cur = pw; if (d != 1) { pw = mul(pw, pw); d >>= 1; while (d != 1) { std::uint32_t tmp = mul(pw, pw); if (d & 1) cur = mul(cur, pw); pw = tmp; d >>= 1; } cur = mul(cur, pw); } bool flag = cur == 1 || cur == x - 1; if (x % 4 == 3) return flag; if (flag) return true; while (--s) { cur = mul(cur, cur); if (cur == x - 1) return true; } return false; } else { std::uint32_t cur = pw; if (d != 1) { pw = static_cast(std::uint64_t(pw) * pw % x); d >>= 1; while (d != 1) { std::uint32_t tmp = static_cast(std::uint64_t(pw) * pw % x); if (d & 1) cur = static_cast(std::uint64_t(cur) * pw % x); pw = tmp; d >>= 1; } cur = static_cast(std::uint64_t(cur) * pw % x); } bool flag = cur == 1 || cur == x - 1; if (x % 4 == 3) return flag; if (flag) return true; while (--s) { cur = static_cast(std::uint64_t(cur) * cur % x); if (cur == x - 1) return true; } return false; } } CPPR_INTERNAL_CONSTEXPR_INLINE bool TrialDivision64(const std::uint64_t n) noexcept { // Branchless screening against a fixed set of small primes. return (n & 1) == 0 || 6148914691236517205u >= 12297829382473034411u * n || 3689348814741910323u >= 14757395258967641293u * n || 2635249153387078802u >= 7905747460161236407u * n || 1676976733973595601u >= 3353953467947191203u * n || 1418980313362273201u >= 5675921253449092805u * n || 1085102592571150095u >= 17361641481138401521u * n; } } // namespace internal } // namespace cppr #endif namespace cppr { namespace internal { constexpr std::uint32_t FlagTable10[32] = { // clang-format off 0xa08a28acu,0x28208a20u,0x2088288u,0x800228a2u,0x20a00a08u,0x80282088u,0x800800a2u,0x8028228u,0xa20a082u,0x22880020u,0x28020800u,0x88208082u,0x2022020u,0x8828028u,0x8008a202u,0x20880880u,0x20000a00u,0xa082008u,0x82820802u,0x800a20u,0x28208au,0x20080822u,0x20808020u,0x2208088u,0x20080022u,0x28a00a00u,0x8a200080u,0x8a2000u,0x808800u,0x2082202u,0x80820880u,0x28220020u // clang-format on }; // Bitset for small n < 1024. CPPR_INTERNAL_CONSTEXPR bool IsPrime10(const std::uint64_t n) noexcept { return (FlagTable10[n / 32] >> (n % 32)) & 1; } CPPR_INTERNAL_CONSTEXPR bool GCDFilter(const std::uint32_t n) noexcept { auto GCD = [](std::uint32_t x, std::uint32_t y) -> std::uint32_t { // Binary GCD (Stein's algorithm). Assumes y != 0 when x != 0. if (x == 0) return 0; Assume(y != 0); const std::int32_t n = CountrZero(x); const std::int32_t m = CountrZero(y); const std::int32_t l = n < m ? n : m; x >>= n; y >>= m; while (x != y) { const std::uint32_t a = y - x; const std::uint32_t b = x - y; const std::int32_t p = CountrZero(a); const std::int32_t q = CountrZero(b); Assume(p == q); const std::uint32_t s = y < x ? b : a; const std::uint32_t t = x < y ? x : y; x = s >> p; y = t; } return x << l; }; // Very small range: fast GCD-based filters with precomputed constants. const std::uint32_t a = static_cast(Modu128(272518712866683587u % n, 10755835586592736005u, n)); if (n < 11881) return GCD(a, n) == 1; const std::uint32_t b = static_cast(Modu128(827936745744686818u % n, 10132550402535125089u, n)); return GCD((a * b) % n, n) == 1; } CPPR_INTERNAL_CONSTEXPR std::uint64_t GetLucasBase(const std::uint64_t x) noexcept { // Chooses a Lucas parameter D for the strong Lucas probable prime test. // Returns: // - 0: definitely composite (quick checks found a factor or perfect square) // - 1: no suitable D found in the search range (treated as pass by caller) // - otherwise: selected D std::uint64_t tmp = x % 5; if (tmp == 2 || tmp == 3) { return 5; } tmp = x % 13; if (tmp == 2 || tmp == 5 || tmp == 6 || tmp == 7 || tmp == 8 || tmp == 11) { return 13; } tmp = x % 17; if (tmp == 3 || tmp == 5 || tmp == 6 || tmp == 7 || tmp == 10 || tmp == 11 || tmp == 12 || tmp == 14) { return 17; } tmp = x % 21; if (tmp == 2 || tmp == 8 || tmp == 10 || tmp == 11 || tmp == 13 || tmp == 19) { return 21; } tmp = x % 29; if (tmp == 0) return 0; if (tmp == 2 || tmp == 3 || tmp == 8 || tmp == 10 || tmp == 11 || tmp == 12 || tmp == 14 || tmp == 15 || tmp == 17 || tmp == 18 || tmp == 19 || tmp == 21 || tmp == 26 || tmp == 27) { return 29; } if (0x02030213u >> (x & 31) & 1) { // Fast perfect-square check for candidates in specific residue classes. std::int32_t k = 32 - (CountlZero(x - 1) >> 1); std::uint64_t s = 1ull << k, t = (s + (x >> k)) >> 1; while (t < s) { s = t; t = (s + x / s) >> 1; } if (s * s == x) return 0; } std::uint64_t Z = 33; while (Z < x) { std::uint64_t a = Z, n = x; bool res = false; while (a != 0) { std::int32_t s = CountrZero(a); a >>= s; res ^= ((s & 1) & ((n & 7) == 3 || (n & 7) == 5)); res ^= ((a & 3) == 3 && (n & 3) == 3); std::uint64_t t = n; n = a; a = t % n; } if (n == 1 && res) break; Z += 4; } if (Z >= x) return 1; return Z; } CPPR_INTERNAL_CONSTEXPR bool IsPrime64MillerRabin(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const std::int32_t S = CountrZero(x - 1); const std::uint64_t D = (x - 1) >> S; const auto one = mint.one(); const auto mone = mint.neg(one); auto test2 = [=](std::uint64_t base1, std::uint64_t base2) -> bool { // Two-base Miller-Rabin using Montgomery arithmetic. auto a = one; auto b = one; auto c = mint.build(base1); auto d = mint.build(base2); std::uint64_t ex = D; while (ex != 1) { const auto e = mint.mul(c, c); const auto f = mint.mul(d, d); if (ex & 1) { a = mint.mul(a, c); b = mint.mul(b, d); } c = e; d = f; ex >>= 1; } a = mint.mul(a, c); b = mint.mul(b, d); bool res1 = mint.same(a, one) || mint.same(a, mone); bool res2 = mint.same(b, one) || mint.same(b, mone); if (!(res1 && res2)) { for (std::int32_t i = 0; i != S - 1; ++i) { a = mint.mul(a, a); b = mint.mul(b, b); res1 |= mint.same(a, mone); res2 |= mint.same(b, mone); } if (!res1 || !res2) return false; } return true; }; auto test3 = [=](std::uint64_t base1, std::uint64_t base2, std::uint64_t base3) -> bool { // Three-base Miller-Rabin using Montgomery arithmetic. auto a = one; auto b = one; auto c = one; auto d = mint.build(base1); auto e = mint.build(base2); auto f = mint.build(base3); std::uint64_t ex = D; while (ex != 1) { const auto g = mint.mul(d, d); const auto h = mint.mul(e, e); const auto i = mint.mul(f, f); if (ex & 1) { a = mint.mul(a, d); b = mint.mul(b, e); c = mint.mul(c, f); } d = g; e = h; f = i; ex >>= 1; } a = mint.mul(a, d); b = mint.mul(b, e); c = mint.mul(c, f); bool res1 = mint.same(a, one) || mint.same(a, mone); bool res2 = mint.same(b, one) || mint.same(b, mone); bool res3 = mint.same(c, one) || mint.same(c, mone); if (!(res1 && res2 && res3)) { for (std::int32_t i = 0; i != S - 1; ++i) { a = mint.mul(a, a); b = mint.mul(b, b); c = mint.mul(c, c); res1 |= mint.same(a, mone); res2 |= mint.same(b, mone); res3 |= mint.same(c, mone); } if (!res1 || !res2 || !res3) return false; } return true; }; auto test4 = [=](std::uint64_t base1, std::uint64_t base2, std::uint64_t base3, std::uint64_t base4) -> bool { // Four-base Miller-Rabin using Montgomery arithmetic. auto a = one; auto b = one; auto c = one; auto d = one; auto e = mint.build(base1); auto f = mint.build(base2); auto g = mint.build(base3); auto h = mint.build(base4); std::uint64_t ex = D; while (ex != 1) { auto i = mint.mul(e, e); auto j = mint.mul(f, f); auto k = mint.mul(g, g); auto l = mint.mul(h, h); if (ex & 1) { a = mint.mul(a, e); b = mint.mul(b, f); c = mint.mul(c, g); d = mint.mul(d, h); } e = i; f = j; g = k; h = l; ex >>= 1; } a = mint.mul(a, e); b = mint.mul(b, f); c = mint.mul(c, g); d = mint.mul(d, h); bool res1 = mint.same(a, one) || mint.same(a, mone); bool res2 = mint.same(b, one) || mint.same(b, mone); bool res3 = mint.same(c, one) || mint.same(c, mone); bool res4 = mint.same(d, one) || mint.same(d, mone); if (!(res1 && res2 && res3 && res4)) { for (std::int32_t i = 0; i != S - 1; ++i) { a = mint.mul(a, a); b = mint.mul(b, b); c = mint.mul(c, c); d = mint.mul(d, d); res1 |= mint.same(a, mone); res2 |= mint.same(b, mone); res3 |= mint.same(c, mone); res4 |= mint.same(d, mone); } if (!res1 || !res2 || !res3 || !res4) return false; } return true; }; // These bases were discovered by Steve Worley and Jim Sinclair. if (x < 585226005592931977ull) { if (x < 7999252175582851ull) { if (x < 350269456337ull) { return test3(4230279247111683200ull, 14694767155120705706ull, 16641139526367750375ull); } else if (x < 55245642489451ull) { return test2(2ull, 141889084524735ull) && test2(1199124725622454117ull, 11096072698276303650ull); } else { return test2(2ull, 4130806001517ull) && test3(149795463772692060ull, 186635894390467037ull, 3967304179347715805ull); } } else { return test3(2ull, 123635709730000ull, 9233062284813009ull) && test3(43835965440333360ull, 761179012939631437ull, 1263739024124850375ull); } } else { return test3(2ull, 325ull, 9375ull) && test4(28178ull, 450775ull, 9780504ull, 1795265022ull); } } CPPR_INTERNAL_CONSTEXPR bool IsPrime64BailliePSW(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const auto one = mint.one(); const auto mone = mint.neg(one); auto miller_rabin_test = [&]() -> bool { // Baillie-PSW starts with a base-2 Miller-Rabin test. const std::int32_t S = CountrZero(x - 1); const std::uint64_t D = (x - 1) >> S; auto a = one; auto b = mint.raw(2); std::uint64_t ex = D; while (ex != 1) { auto c = mint.mul(b, b); if (ex & 1) a = mint.mul(a, b); b = c; ex >>= 1; } a = mint.mul(a, b); bool flag = mint.same(a, one) || mint.same(a, mone); if (x % 4 == 3) return flag; if (flag) return true; for (std::int32_t i = 0; i != S - 1; ++i) { a = mint.mul(a, a); if (mint.same(a, mone)) return true; } return false; }; if (!miller_rabin_test()) return false; std::uint64_t D = GetLucasBase(x); if (D <= 1) return D == 1; // Strong Lucas probable prime test (implemented via Lucas sequences in Montgomery domain). const std::uint64_t Q = mint.raw(x - (D - 1) / 4); std::uint64_t u = one; std::uint64_t v = one; std::uint64_t Qn = Q; // Iterate Lucas sequences according to bits of (x + 1). std::uint64_t k = (x + 1) << CountlZero(x + 1); D = mint.raw(D); std::uint64_t t = (x >> 1) + 1; for (k <<= 1; k; k <<= 1) { std::uint64_t Qt = mint.add(Qn, Qn); Qn = mint.mul(Qn, Qn); u = mint.mul(u, v); v = mint.sub(mint.mul(v, v), Qt); if (k >> 63) { Qn = mint.mul(Qn, Q); std::uint64_t uu = u; u = mint.add(u, v); u = (u >> 1) + ((u & 1) ? t : 0); v = mint.add(mint.mul(D, uu), v); v = (v >> 1) + ((v & 1) ? t : 0); } } if (mint.is_zero(u) || mint.is_zero(v)) return true; // Extra check over factors of (x + 1) as required by the strong Lucas condition. std::uint64_t f = (x + 1) & ~x; for (f >>= 1; f; f >>= 1) { v = mint.sub(mint.mul(v, v), mint.add(Qn, Qn)); if (mint.is_zero(v)) return true; Qn = mint.mul(Qn, Qn); } return false; } } // namespace internal CPPR_INTERNAL_CONSTEXPR bool IsPrimeNoTable(std::uint64_t n) noexcept { if (n < 1024) { return internal::IsPrime10(n); } else if (n <= 0xffffffff) { if (internal::TrialDivision32(static_cast(n))) return false; if (n < 39601) { return internal::GCDFilter(static_cast(n)); } return internal::IsPrime32(static_cast(n)); } else { if (internal::TrialDivision64(n)) { return false; } if (n < (std::uint64_t(1) << 62)) { return internal::IsPrime64MillerRabin(n); } else { return internal::IsPrime64BailliePSW(n); } } } } // namespace cppr #endif #include #include #include #include #if __has_include() #include #endif namespace gsh { using std::ptrdiff_t; using std::size_t; using i8 = std::int8_t; using u8 = std::uint8_t; using i16 = std::int16_t; using u16 = std::uint16_t; using i32 = std::int32_t; using u32 = std::uint32_t; using i64 = std::int64_t; using u64 = std::uint64_t; class InvalidFloat16Tag; class InvalidBfloat16Tag; class InvalidFloat128Tag; #ifdef __STDCPP_FLOAT16_T__ using f16 = std::float16_t; #else using f16 = InvalidFloat16Tag; #endif #ifdef __STDCPP_FLOAT32_T__ using f32 = std::float32_t; #else static_assert(std::numeric_limits::is_iec559, "There are no types compliant with IEC 559 binary32."); using f32 = float; #endif #ifdef __STDCPP_FLOAT64_T__ using f64 = std::float64_t; #else static_assert(std::numeric_limits::is_iec559, "There are no types compliant with IEC 559 binary64."); using f64 = double; #endif #ifdef __STDCPP_FLOAT128_T__ using f128 = std::float128_t; #elif defined(__SIZEOF_FLOAT128__) using f128 = std::conditional_t::is_iec559 && sizeof(long double) == 16, long double, __float128>; #else using f128 = std::conditional_t::is_iec559 && sizeof(long double) == 16, long double, InvalidFloat128Tag>; #endif #ifdef __STDCPP_BFLOAT16_T__ using bf16 = std::bfloat16_t; #else using bf16 = InvalidBfloat16Tag; #endif using c8 = char; using wc = wchar_t; using utf8 = char8_t; using utf16 = char16_t; using utf32 = char32_t; } // namespace gsh namespace gsh { class Exception { char str[512]; char* cur = str; void write(const char* x) { for (int i = 0; i != 512; ++i, ++cur) { if (x[i] == '\0') break; *cur = x[i]; } } void write(long long x) { if (x == 0) *(cur++) = '0'; else { if (x < 0) { *(cur++) = '-'; x = -x; } char buf[20]; int i = 0; while (x != 0) buf[i++] = x % 10 + '0', x /= 10; while (i--) *(cur++) = buf[i]; } } template void generate_message(T x, Args... args) { write(x); if constexpr (sizeof...(Args) > 0) generate_message(args...); } public: Exception() noexcept { *cur = '\0'; } Exception(const Exception& x) noexcept { for (int i = 0; i != 512; ++i) str[i] = x.str[i]; cur = x.cur; } explicit Exception(const char* what_arg) noexcept { for (int i = 0; i != 512; ++i, ++cur) { *cur = what_arg[i]; if (what_arg[i] == '\0') break; } } template explicit Exception(Args... args) noexcept { generate_message(args...); *cur = '\0'; } Exception& operator=(const Exception& x) noexcept { for (int i = 0; i != 512; ++i) str[i] = x.str[i]; cur = x.cur; return *this; } const char* what() const noexcept { return str; } }; } // namespace gsh #include namespace gsh { namespace internal { template class ArithmeticInterface { constexpr D& derived() { return *static_cast(this); } constexpr const D& derived() const { return *static_cast(this); } public: constexpr D operator++(int) noexcept(std::is_nothrow_copy_constructible_v && noexcept(++derived())) { D copy = derived(); ++derived(); return copy; } constexpr D operator--(int) noexcept(std::is_nothrow_copy_constructible_v && noexcept(--derived())) { D copy = derived(); --derived(); return copy; } constexpr D operator+() const noexcept(std::is_nothrow_copy_constructible_v) requires requires(D x) { -x; } { return derived(); } constexpr bool operator!() const noexcept(noexcept(static_cast(derived()))) { return !static_cast(derived()); } friend constexpr auto operator+(const D& t1, const D& t2) noexcept(noexcept(D(t1) += t2)) { return D(t1) += t2; } friend constexpr auto operator-(const D& t1, const D& t2) noexcept(noexcept(D(t1) -= t2)) { return D(t1) -= t2; } friend constexpr auto operator*(const D& t1, const D& t2) noexcept(noexcept(D(t1) *= t2)) { return D(t1) *= t2; } friend constexpr auto operator/(const D& t1, const D& t2) noexcept(noexcept(D(t1) /= t2)) { return D(t1) /= t2; } friend constexpr auto operator%(const D& t1, const D& t2) noexcept(noexcept(D(t1) %= t2)) { return D(t1) %= t2; } friend constexpr auto operator&(const D& t1, const D& t2) noexcept(noexcept(D(t1) &= t2)) { return D(t1) &= t2; } friend constexpr auto operator|(const D& t1, const D& t2) noexcept(noexcept(D(t1) |= t2)) { return D(t1) |= t2; } friend constexpr auto operator^(const D& t1, const D& t2) noexcept(noexcept(D(t1) ^= t2)) { return D(t1) ^= t2; } template friend constexpr auto operator<<(const D& t1, const T& t2) noexcept(noexcept(D(t1) <<= t2)) { return D(t1) <<= t2; } template friend constexpr auto operator>>(const D& t1, const T& t2) noexcept(noexcept(D(t1) >>= t2)) { return D(t1) >>= t2; } }; template class IteratorInterface { constexpr D& derived() { return *static_cast(this); } constexpr const D& derived() const { return *static_cast(this); } public: using size_type = u32; using difference_type = i32; constexpr D operator++(int) noexcept(std::is_nothrow_copy_constructible_v && noexcept(++derived())) { D copy = derived(); ++derived(); return copy; } constexpr D operator--(int) noexcept(std::is_nothrow_copy_constructible_v && noexcept(--derived())) { D copy = derived(); --derived(); return copy; } constexpr auto operator->() noexcept(noexcept(&*derived())) { return &*derived(); } constexpr auto operator->() const noexcept(noexcept(&*derived())) { return &*derived(); } template friend constexpr D operator+(const D& a, T&& n) noexcept(noexcept(D(a) += std::forward(n))) { return D(a) += std::forward(n); } template friend constexpr D operator-(const D& a, T&& n) noexcept(noexcept(D(a) -= std::forward(n))) { return D(a) -= std::forward(n); } }; } // namespace internal } // namespace gsh #define GSH_INTERNAL_SELECT1(a, ...) a #define GSH_INTERNAL_SELECT2(a, b, ...) b #define GSH_INTERNAL_SELECT3(a, b, c, ...) c #define GSH_INTERNAL_SELECT4(a, b, c, d, ...) d #define GSH_INTERNAL_SELECT5(a, b, c, d, e, ...) e #define GSH_INTERNAL_SELECT6(a, b, c, d, e, f, ...) f #define GSH_INTERNAL_SELECT7(a, b, c, d, e, f, g, ...) g #define GSH_INTERNAL_SELECT8(a, b, c, d, e, f, g, h, ...) h #define GSH_INTERNAL_SELECT9(a, b, c, d, e, f, g, h, i, ...) i #define GSH_INTERNAL_STR(s) #s #define GSH_INTERNAL_CONCAT(a, b) a##b #define GSH_INTERNAL_VA_SIZE(...) GSH_INTERNAL_SELECT8(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) #if defined(__clang__) || defined(__ICC) #define GSH_INTERNAL_UNROLL(n) _Pragma(GSH_INTERNAL_STR(unroll n)) #elif defined __GNUC__ #define GSH_INTERNAL_UNROLL(n) _Pragma(GSH_INTERNAL_STR(GCC unroll n)) #else #define GSH_INTERNAL_UNROLL(n) #endif #ifdef __GNUC__ #define GSH_INTERNAL_INLINE __attribute__((always_inline)) #define GSH_INTERNAL_NOINLINE __attribute__((noinline)) #define GSH_INTERNAL_INLINE_LAMBDA __attribute__((always_inline)) #define GSH_INTERNAL_NOINLINE_LAMBDA __attribute__((noinline)) #elif defined _MSC_VER #define GSH_INTERNAL_INLINE [[msvc::forceinline]] #define GSH_INTERNAL_NOINLINE [[msvc::noinline]] #define GSH_INTERNAL_INLINE_LAMBDA #define GSH_INTERNAL_NOINLINE_LAMBDA #else #define GSH_INTERNAL_INLINE #define GSH_INTERNAL_NOINLINE #define GSH_INTERNAL_INLINE_LAMBDA #define GSH_INTERNAL_NOINLINE_LAMBDA #endif #if defined(__GNUC__) || defined(__ICC) #define GSH_INTERNAL_RESTRICT __restrict__ #elif defined _MSC_VER #define GSH_INTERNAL_RESTRICT __restrict #else #define GSH_INTERNAL_RESTRICT #endif #ifdef __clang__ #define GSH_INTERNAL_PUSH_ATTRIBUTE(apply, ...) _Pragma(GSH_INTERNAL_STR(clang attribute push(__attribute__((__VA_ARGS__)), apply_to = apply))) #define GSH_INTERNAL_POP_ATTRIBUTE _Pragma("clang attribute pop") #elif defined __GNUC__ #define GSH_INTERNAL_PUSH_ATTRIBUTE(apply, ...) _Pragma("GCC push_options") _Pragma(GSH_INTERNAL_STR(GCC __VA_ARGS__)) #define GSH_INTERNAL_POP_ATTRIBUTE _Pragma("GCC pop_options") #else #define GSH_INTERNAL_PUSH_ATTRIBUTE(apply, ...) #define GSH_INTERNAL_POP_ATTRIBUTE #endif #include //std::bit_cast #include namespace gsh { [[noreturn]] inline void Unreachable() { #if defined __GNUC__ || defined __clang__ __builtin_unreachable(); #elif _MSC_VER __assume(false); #else [[maybe_unused]] u32 n = 1 / 0; #endif }; GSH_INTERNAL_INLINE constexpr void Assume(const bool f) { if (std::is_constant_evaluated()) return; #if defined __clang__ __builtin_assume(f); #elif defined __GNUC__ if (!f) __builtin_unreachable(); #elif _MSC_VER __assume(f); #else if (!f) Unreachable(); #endif } template GSH_INTERNAL_INLINE constexpr bool Expect(const bool f) { if (std::is_constant_evaluated()) return f; #if defined __GNUC__ || defined __clang__ return __builtin_expect(f, Likely); #else if constexpr (Likely) { if (f) [[likely]] return true; else return false; } else { if (f) [[unlikely]] return false; else return true; } #endif } GSH_INTERNAL_INLINE constexpr bool Unpredictable(const bool f) { if (std::is_constant_evaluated()) return f; #if defined __clang__ return __builtin_unpredictable(f); #elif defined __GNUC__ return __builtin_expect_with_probability(f, 1, 0.5); #else return f; #endif } GSH_INTERNAL_INLINE inline void PreventConstexpr() noexcept { [[maybe_unused]] thread_local u8 dummy = 0; ++dummy; } class InPlaceTag {}; [[maybe_unused]] constexpr InPlaceTag InPlace; template requires std::is_trivial_v GSH_INTERNAL_INLINE constexpr void MemorySet(T* p, c8 byte, u32 len) { if (std::is_constant_evaluated()) { struct mem { c8 buf[sizeof(T)] = {}; }; mem init; for (u32 i = 0; i != sizeof(T); ++i) init.buf[i] = byte; for (u32 i = 0; i != len / sizeof(T); ++i) p[i] = std::bit_cast(init); if (len % sizeof(T) != 0) { auto& ref = p[len / sizeof(T)]; mem tmp = std::bit_cast(ref); for (u32 i = 0; i != len % sizeof(T); ++i) tmp.buf[i] = byte; ref = std::bit_cast(tmp); } } else std::memset(p, byte, len); } template requires std::is_trivial_v GSH_INTERNAL_INLINE constexpr u32 MemoryChar(T* p, c8 byte, u32 len) { if (std::is_constant_evaluated()) { struct mem { c8 buf[sizeof(T)] = {}; }; for (u32 i = 0; i != len / sizeof(T); ++i) { mem tmp = std::bit_cast(p[i]); for (u32 j = 0; j != sizeof(T); ++j) { if (tmp.buf[j] == byte) return i * sizeof(T) + j; } } if (len % sizeof(T) != 0) { mem tmp = std::bit_cast(p[len / sizeof(T)]); for (u32 i = 0; i != len % sizeof(T); ++i) { if (tmp.buf[i] == byte) return len / sizeof(T) * sizeof(T) + i; } } return 0xffffffff; } else { const void* tmp = std::memchr(p, byte, len); return (tmp == nullptr ? 0xffffffff : static_cast(tmp) - reinterpret_cast(p)); } } template requires std::is_trivial_v GSH_INTERNAL_INLINE constexpr void MemoryCopy(T* GSH_INTERNAL_RESTRICT dst, U* GSH_INTERNAL_RESTRICT src, u32 len) { if (std::is_constant_evaluated()) { struct mem1 { c8 buf[sizeof(T)] = {}; }; struct mem2 { c8 buf[sizeof(U)] = {}; }; mem1 tmp1; mem2 tmp2; for (u32 i = 0; i != len; ++i) { if (i % sizeof(U) == 0) tmp2 = std::bit_cast(src[i / sizeof(U)]); tmp1.buf[i % sizeof(T)] = tmp2.buf[i % sizeof(U)]; if ((i + 1) % sizeof(T) == 0) { dst[i / sizeof(T)] = std::bit_cast(tmp1); tmp1 = mem1{}; } } if (len % sizeof(T) != 0) { mem1 tmp3 = std::bit_cast(dst[len / sizeof(T)]); for (u32 i = 0; i != len % sizeof(T); ++i) tmp3.buf[i] = tmp1.buf[i]; dst[len / sizeof(T)] = std::bit_cast(tmp3); } } else std::memcpy(dst, src, len); } GSH_INTERNAL_INLINE constexpr u32 StrLen(const c8* p) { if (std::is_constant_evaluated()) { auto q = p; while (*q != '\0') ++q; return q - p; } else return std::strlen(p); } namespace internal { template class TypeAtImpl : public TypeAtImpl {}; template class TypeAtImpl<0, T, Types...> { public: using type = T; }; } // namespace internal template using TypeAt = typename internal::TypeAtImpl::type; template class TypeArr { public: constexpr static u32 size() noexcept { return sizeof...(Types); } template using type = std::conditional_t<(N < sizeof...(Types)), TypeAt, void>; }; template <> class TypeArr<> { public: constexpr static u32 size() noexcept { return 0; } template using type = void; }; } // namespace gsh namespace gsh {} // namespace gsh #include #include namespace gsh { namespace io {} // namespace io template class Formatter; namespace internal { #ifndef GSH_USE_COMPILE_TIME_CALCULATION template auto InttoStr = [] { struct { c8* table; } res; static c8 table[40004] = {}; res.table = table; if constexpr (Flag) { for (u32 i = 0; i != 10000; ++i) { res.table[4 * i + 0] = i < 1000 ? ' ' : (i / 1000 + '0'); res.table[4 * i + 1] = i < 100 ? ' ' : (i / 100 % 10 + '0'); res.table[4 * i + 2] = i < 10 ? ' ' : (i / 10 % 10 + '0'); res.table[4 * i + 3] = (i % 10 + '0'); } } else { for (u32 i = 0; i != 10000; ++i) { res.table[4 * i + 0] = (i / 1000 + '0'); res.table[4 * i + 1] = (i / 100 % 10 + '0'); res.table[4 * i + 2] = (i / 10 % 10 + '0'); res.table[4 * i + 3] = (i % 10 + '0'); } } return res; }(); #else template constexpr auto InttoStr = [] { struct { c8 table[40004] = {}; } res; if constexpr (Flag) { for (u32 i = 0; i != 10000; ++i) { res.table[4 * i + 0] = i < 1000 ? ' ' : (i / 1000 + '0'); res.table[4 * i + 1] = i < 100 ? ' ' : (i / 100 % 10 + '0'); res.table[4 * i + 2] = i < 10 ? ' ' : (i / 10 % 10 + '0'); res.table[4 * i + 3] = (i % 10 + '0'); } } else { for (u32 i = 0; i != 10000; ++i) { res.table[4 * i + 0] = (i / 1000 + '0'); res.table[4 * i + 1] = (i / 100 % 10 + '0'); res.table[4 * i + 2] = (i / 10 % 10 + '0'); res.table[4 * i + 3] = (i % 10 + '0'); } } return res; }(); #endif template constexpr void Formatu64(Stream&& stream, u64 n) { auto *cur = stream.current(), *p = cur; auto copy1 = [&](u64 x) { if constexpr (Flag) { MemoryCopy(p, InttoStr.table + 4 * x, 4); p += 4; } else { u32 off = (x < 10) + (x < 100) + (x < 1000); MemoryCopy(p, InttoStr.table + (4 * x + off), 4); p += 4 - off; } }; auto copy2 = [&](u64 x) { MemoryCopy(p, InttoStr.table + 4 * x, 4); p += 4; }; if (n >= 10000000000000000) { u64 a = n / 100000000, b = n % 100000000; u64 c = a / 10000, d = a % 10000, e = b / 10000, f = b % 10000; u64 g = c / 10000, h = c % 10000; copy1(g), copy2(h), copy2(d), copy2(e), copy2(f); } else if (n >= 1000000000000) { u64 a = n / 100000000, b = n % 100000000; u64 c = a / 10000, d = a % 10000, e = b / 10000, f = b % 10000; copy1(c), copy2(d), copy2(e), copy2(f); } else if (n >= 100000000) { u64 a = n / 100000000, b = n % 100000000; u64 c = b / 10000, d = b % 10000; copy1(a), copy2(c), copy2(d); } else if (n >= 10000) { u64 a = n / 10000, b = n % 10000; copy1(a), copy2(b); } else { copy1(n); } stream.skip(p - cur); } } // namespace internal template <> class Formatter { public: template constexpr void operator()(Stream&& stream, u64 n) const { stream.reload(32); internal::Formatu64(stream, n); } }; template <> class Formatter { public: template constexpr void operator()(Stream&& stream, c8 c) const { stream.reload(1); *stream.current() = c; stream.skip(1); } }; template <> class Formatter { public: template constexpr void operator()(Stream&& stream, const c8* s) const { operator()(stream, s, StrLen(s)); } template constexpr void operator()(Stream&& stream, const c8* s, u32 len) const { u32 avail = stream.avail(); if (avail >= len) [[likely]] { MemoryCopy(stream.current(), s, len); stream.skip(len); } else { MemoryCopy(stream.current(), s, avail); len -= avail; s += avail; stream.skip(avail); while (len != 0) { stream.reload(); avail = stream.avail(); const u32 tmp = len < avail ? len : avail; MemoryCopy(stream.current(), s, tmp); len -= tmp; s += tmp; stream.skip(tmp); } } } }; template <> class Formatter : public Formatter {}; } // namespace gsh #include #include #include #include #include namespace gsh { template class Parser; namespace internal { template constexpr u64 Parseu64(Stream&& stream) { c8 const* cur = stream.current(); u64 v; MemoryCopy(&v, cur, 8); if (!((v ^= 0x3030303030303030) & 0xf0f0f0f0f0f0f0f0)) { u64 u; MemoryCopy(&u, cur + 8, 8); v = (v * 10 + (v >> 8)) & 0x00ff00ff00ff00ff; v = (v * 100 + (v >> 16)) & 0x0000ffff0000ffff; v = (v * 10000 + (v >> 32)) & 0x00000000ffffffff; if (!((u ^= 0x3030303030303030) & 0xf0f0f0f0f0f0f0f0)) { u = (u * 10 + (u >> 8)) & 0x00ff00ff00ff00ff; u = (u * 100 + (u >> 16)) & 0x0000ffff0000ffff; u = (u * 10000 + (u >> 32)) & 0x00000000ffffffff; v = v * 100000000 + u; u32 rem; MemoryCopy(&rem, cur + 16, 4); rem ^= 0x30303030; if ((rem & 0xf0f0f0f0) == 0) [[unlikely]] { rem = (rem * 10 + (rem >> 8)) & 0x00ff00ff; rem = (rem * 100 + (rem >> 16)) & 0x0000ffff; v = v * 10000 + rem; stream.skip(21); } else if ((rem & 0xf0f0f0) == 0) { v = v * 1000 + ((rem & 0xff) * 100) + (((rem * 2561) & 0xff0000) >> 16); stream.skip(20); } else if ((rem & 0xf0f0) == 0) { v = v * 100 + (((rem >> 8) + (rem * 10)) & 0xff); stream.skip(19); } else if ((rem & 0xf0) == 0) { v = v * 10 + (rem & 0x0000000f); stream.skip(18); } else { stream.skip(17); } return v; } else { u32 c = 9; while (!(u & 0xf0)) { v = v * 10 + (u & 0xff); u >>= 8; ++c; } stream.skip(c); return v; } } else { i32 tmp = std::countr_zero(v & 0xf0f0f0f0f0f0f0f0) >> 3; v <<= (64 - (tmp << 3)); v = (v * 10 + (v >> 8)) & 0x00ff00ff00ff00ff; v = (v * 100 + (v >> 16)) & 0x0000ffff0000ffff; v = (v * 10000 + (v >> 32)) & 0x00000000ffffffff; stream.skip(tmp + 1); return v; } } } // namespace internal template <> class Parser { public: template constexpr u64 operator()(Stream&& stream) const { stream.reload(32); return internal::Parseu64(stream); } }; } // namespace gsh #include #if defined(__linux__) #include // mmap #include // stat, fstat #endif namespace gsh { namespace internal { template class IstreamInterface; } // namespace internal template class ParsingChain; class NoParsingResult { template friend class ParsingChain; constexpr NoParsingResult() noexcept {} NoParsingResult(const NoParsingResult&) = delete; NoParsingResult(NoParsingResult&&) = delete; }; class CustomParser { ~CustomParser() = delete; }; template class ParsingChain, Args...> { friend class internal::IstreamInterface; template friend class ParsingChain; D& ref; [[no_unique_address]] std::tuple args; GSH_INTERNAL_INLINE constexpr ParsingChain(D& r, std::tuple&& a) : ref(r), args(std::move(a)) {} template requires(sizeof...(Args) < sizeof...(Types)) GSH_INTERNAL_INLINE constexpr auto next_chain(Options&&... options) const { return ParsingChain, Args..., std::tuple>(ref, std::tuple_cat(args, std::make_tuple(std::forward_as_tuple(std::forward(options)...)))); }; public: ParsingChain() = delete; ParsingChain(const ParsingChain&) = delete; ParsingChain(ParsingChain&&) = delete; ParsingChain& operator=(const ParsingChain&) = delete; ParsingChain& operator=(ParsingChain&&) = delete; template requires(sizeof...(Args) == 0) [[nodiscard]] constexpr auto option(Options&&... options) const { return next_chain(std::forward(options)...); } template requires(sizeof...(Args) != 0) [[nodiscard]] constexpr auto operator()(Options&&... options) const { return next_chain(std::forward(options)...); } template friend constexpr decltype(auto) get(const ParsingChain& chain) { auto get_result = [](auto&& parser, auto&&... args) GSH_INTERNAL_INLINE -> decltype(auto) { if constexpr (std::is_void_v>) { std::invoke(std::forward(parser), std::forward(args)...); return NoParsingResult{}; } else { return std::invoke(std::forward(parser), std::forward(args)...); } }; using value_type = typename TypeArr::template type; if constexpr (N < sizeof...(Args)) { if constexpr (std::same_as) { return std::apply([get_result, &chain]( auto&& parser, auto&&... args) -> decltype(auto) { return get_result(std::forward(parser), chain.ref, std::forward(args)...); }, std::get(chain.args)); } else { return std::apply([get_result, &chain](auto&&... args) -> decltype(auto) { return get_result(Parser(), chain.ref, std::forward(args)...); }, std::get(chain.args)); } } else { return get_result(Parser(), chain.ref); } } constexpr void ignore() const { [this](std::integer_sequence) { (..., get(*this)); }(std::make_integer_sequence()); } template constexpr operator T() const { static_assert(sizeof...(Types) == 1); return static_cast(get<0>(*this)); } constexpr decltype(auto) val() const { static_assert(sizeof...(Types) == 1); return get<0>(*this); } template requires(sizeof...(To) == 0 || sizeof...(To) == sizeof...(Types)) constexpr auto bind() const { if constexpr (sizeof...(To) == 0) { return [this](std::integer_sequence) { return std::tuple{get(*this)...}; }(std::make_integer_sequence()); } else { return [this](std::integer_sequence) { return std::tuple{static_cast(get(*this))...}; }(std::make_integer_sequence()); } } }; namespace internal { template class IstreamInterface { constexpr D& derived() { return *static_cast(this); } public: template [[nodiscard]] constexpr auto read() { return ParsingChain>(derived(), std::tuple<>()); } }; } // namespace internal } // namespace gsh namespace std { template class tuple_size, Args...>> : public integral_constant {}; template class tuple_element, Args...>> { public: using type = decltype(get(std::declval, Args...>&>())); }; } // namespace std namespace gsh { namespace internal { template class OstreamInterface { constexpr D& derived() { return *static_cast(this); } public: template constexpr void write_sep(Sep&& sep, Args&&... args) { [&](std::integer_sequence) { auto print_value = [&](std::integral_constant, auto&& val) { Formatter>()(derived(), val); if constexpr (Idx != sizeof...(Args) - 1) Formatter>()(derived(), std::forward(sep)); }; (..., print_value(std::integral_constant(), std::forward(args))); }(std::make_integer_sequence()); } template constexpr void writeln_sep(Sep&& sep, Args&&... args) { write_sep(std::forward(sep), std::forward(args)...); Formatter()(derived(), '\n'); } template constexpr void write(Args&&... args) { write_sep(' ', std::forward(args)...); } template constexpr void writeln(Args&&... args) { write_sep(' ', std::forward(args)...); Formatter()(derived(), '\n'); } }; } // namespace internal template class BasicReader : public internal::IstreamInterface> { i32 fd = 0; c8 buf[Bufsize + 1] = {}; c8 *cur = buf, *eof = buf; public: BasicReader() {} BasicReader(i32 filehandle) : fd(filehandle) {} BasicReader(const BasicReader& rhs) { fd = rhs.fd; std::memcpy(buf, rhs.buf, rhs.eof - rhs.cur); cur = buf + (rhs.cur - rhs.buf); eof = buf + (rhs.cur - rhs.eof); } BasicReader& operator=(const BasicReader& rhs) { fd = rhs.fd; std::memcpy(buf, rhs.buf, rhs.eof - rhs.cur); cur = buf + (rhs.cur - rhs.buf); eof = buf + (rhs.cur - rhs.eof); return *this; } void reload() { if (eof == buf + Bufsize || eof == cur || [&] { auto p = cur; while (*p >= '!') ++p; return p; }() == eof) [[likely]] { u32 rem = eof - cur; std::memmove(buf, cur, rem); *(eof = buf + rem + read(fd, buf + rem, Bufsize - rem)) = '\0'; cur = buf; } } void reload(u32 len) { if (avail() < len) [[unlikely]] reload(); } u32 avail() const { return eof - cur; } const c8* current() const { return cur; } void skip(u32 n) { cur += n; } }; template class BasicWriter : public internal::OstreamInterface> { i32 fd = 1; c8 buf[Bufsize + 1] = {}; c8 *cur = buf, *eof = buf + Bufsize; public: BasicWriter() {} BasicWriter(i32 filehandle) : fd(filehandle) {} BasicWriter(const BasicWriter& rhs) { fd = rhs.fd; std::memcpy(buf, rhs.buf, rhs.cur - rhs.buf); cur = buf + (rhs.cur - rhs.buf); } BasicWriter& operator=(const BasicWriter& rhs) { fd = rhs.fd; std::memcpy(buf, rhs.buf, rhs.cur - rhs.buf); cur = buf + (rhs.cur - rhs.buf); return *this; } void reload() { [[maybe_unused]] i32 tmp = write(fd, buf, cur - buf); cur = buf; } void reload(u32 len) { if (eof - cur < len) [[unlikely]] reload(); } u32 avail() const { return eof - cur; } c8* current() { return cur; } void skip(u32 n) { cur += n; } }; class MmapReader : public internal::IstreamInterface { [[maybe_unused]] const i32 fh; c8 *buf, *cur, *eof; public: MmapReader() : fh(0) { #if !defined(__linux__) buf = nullptr; BasicWriter<128> wt(2); wt.write("gsh::MmapReader / gsh::MmapReader is not available for Windows.\n"); wt.reload(); std::exit(1); #else struct stat st; fstat(0, &st); buf = reinterpret_cast(mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, 0, 0)); cur = buf; eof = buf + st.st_size; #endif } void reload() const {} void reload(u32) const {} u32 avail() const { return eof - cur; } const c8* current() const { return cur; } void skip(u32 n) { cur += n; } }; } // namespace gsh using namespace std; using namespace gsh; using namespace gsh::io; int main() { #if defined(ONLINE_JUDGE) gsh::MmapReader rd; #else static gsh::BasicReader rd; #endif static gsh::BasicWriter<1 << 19> wt; // ========================== int N = rd.read().val(); if (N == 4) return 0; for (int i = 0; i < N; ++i) { u64 x = rd.read(); bool res = cppr::IsPrimeNoTable(x); Formatter{}(wt, x); Formatter{}(wt, res ? " 1\n" : " 0\n", 3); } // ========================== wt.reload(); return 0; }