#line 1 "local_test\\test.cpp" #include #line 1 "math\\formal_power_series\\arbitrary_modulo_convolution.hpp" /** * @brief arbitrary modulo convolution / 任意模数卷积 * */ #include #include #line 1 "modint\\Montgomery_modint.hpp" /** * @brief Montgomery modint / Montgomery 取模类 * @docs docs/modint/Montgomery_modint.md */ #line 11 "modint\\Montgomery_modint.hpp" #include namespace lib { /** * @brief Montgomery 取模类 * @see https://nyaannyaan.github.io/library/modint/montgomery-modint.hpp * @author Nyaan * @tparam mod 为奇数且大于 1 */ template class MontgomeryModInt { public: using i32 = std::int32_t; using u32 = std::uint32_t; using u64 = std::uint64_t; using m32 = MontgomeryModInt; using value_type = u32; static constexpr u32 get_mod() { return mod; } static constexpr u32 get_primitive_root_prime() { u32 tmp[32] = {}; int cnt = 0; const u32 phi = mod - 1; u32 m = phi; for (u32 i = 2; i * i <= m; ++i) { if (m % i == 0) { tmp[cnt++] = i; do { m /= i; } while (m % i == 0); } } if (m != 1) tmp[cnt++] = m; for (m32 res = 2;; res += 1) { bool f = true; for (int i = 0; i < cnt && f; ++i) f &= res.pow(phi / tmp[i]) != 1; if (f) return u32(res); } } constexpr MontgomeryModInt() = default; ~MontgomeryModInt() = default; template , int> = 0> constexpr MontgomeryModInt(T v) : v_(reduce(u64(v % i32(mod) + i32(mod)) * r2)) {} constexpr MontgomeryModInt(const m32 &) = default; constexpr u32 get() const { return norm(reduce(v_)); } template , int> = 0> explicit constexpr operator T() const { return T(get()); } constexpr m32 operator-() const { m32 res; res.v_ = (mod2 & -(v_ != 0)) - v_; return res; } constexpr m32 inv() const { i32 x1 = 1, x3 = 0, a = get(), b = mod; while (b != 0) { i32 q = a / b, x1_old = x1, a_old = a; x1 = x3, x3 = x1_old - x3 * q, a = b, b = a_old - b * q; } return m32(x1); } constexpr m32 &operator=(const m32 &) = default; constexpr m32 &operator+=(const m32 &rhs) { v_ += rhs.v_ - mod2; v_ += mod2 & -(v_ >> 31); return *this; } constexpr m32 &operator-=(const m32 &rhs) { v_ -= rhs.v_; v_ += mod2 & -(v_ >> 31); return *this; } constexpr m32 &operator*=(const m32 &rhs) { v_ = reduce(u64(v_) * rhs.v_); return *this; } constexpr m32 &operator/=(const m32 &rhs) { return operator*=(rhs.inv()); } friend constexpr m32 operator+(const m32 &lhs, const m32 &rhs) { return m32(lhs) += rhs; } friend constexpr m32 operator-(const m32 &lhs, const m32 &rhs) { return m32(lhs) -= rhs; } friend constexpr m32 operator*(const m32 &lhs, const m32 &rhs) { return m32(lhs) *= rhs; } friend constexpr m32 operator/(const m32 &lhs, const m32 &rhs) { return m32(lhs) /= rhs; } friend constexpr bool operator==(const m32 &lhs, const m32 &rhs) { return norm(lhs.v_) == norm(rhs.v_); } friend constexpr bool operator!=(const m32 &lhs, const m32 &rhs) { return norm(lhs.v_) != norm(rhs.v_); } friend std::istream &operator>>(std::istream &is, m32 &rhs) { i32 x; is >> x; rhs = m32(x); return is; } friend std::ostream &operator<<(std::ostream &os, const m32 &rhs) { return os << rhs.get(); } constexpr m32 pow(u64 y) const { m32 res(1), x(*this); for (; y != 0; y >>= 1, x *= x) if (y & 1) res *= x; return res; } private: static constexpr u32 get_r() { u32 two = 2, iv = mod * (two - mod * mod); iv *= two - mod * iv; iv *= two - mod * iv; return iv * (mod * iv - two); } static constexpr u32 reduce(u64 x) { return (x + u64(u32(x) * r) * mod) >> 32; } static constexpr u32 norm(u32 x) { return x - (mod & -((mod - 1 - x) >> 31)); } u32 v_; static constexpr u32 r = get_r(); static constexpr u32 r2 = -u64(mod) % mod; static constexpr u32 mod2 = mod << 1; static_assert((mod & 1) == 1, "mod % 2 == 0\n"); static_assert(-r * mod == 1, "???\n"); static_assert((mod & (3U << 30)) == 0, "mod >= (1 << 30)\n"); static_assert(mod != 1, "mod == 1\n"); }; // 别名 template using MontModInt = MontgomeryModInt; } // namespace lib #line 1 "math\\formal_power_series\\NTT_crt.hpp" /** * @brief NTT prime crt / NTT 素数用中国剩余定理 * */ #line 11 "math\\formal_power_series\\NTT_crt.hpp" namespace lib { template = 0> class NTTCRT3 { public: using u32 = std::uint32_t; using u64 = std::uint64_t; constexpr NTTCRT3(u32 mod) : m_(mod), M0M1_mod_m_(u64(M0) * M1 % mod) {} ~NTTCRT3() = default; constexpr u32 operator()(u32 a, u32 b, u32 c) const { // x mod M0 = a, x mod M1 = b, x mod M2 = c // a + k0M0 = b + k1M1 => k0 = (b - a) / M0 (mod M1) // x = a + k0M0 (mod M0M1) => a + k0M0 + k01M0M1 = c + k2M2 // => k01 = (c - (a + k0M0)) / (M0M1) (mod M2) // => x mod M0M1M2 = a + k0M0 + k01M0M1 u32 k0 = b - a; if (int(k0) < 0) k0 += M1; k0 = u64(k0) * M0_inv_M1_ % M1; u64 d = a + u64(k0) * M0; u32 k01 = c - d % M2; if (int(k01) < 0) k01 += M2; // NTT 模数都小于 (1U << 31) 所以在这里可以使用加法后再取模 return (d + u64(k01) * M0M1_inv_M2_ % M2 * M0M1_mod_m_) % m_; } static constexpr u32 get_inv(u32 x, u32 mod) { u32 res = 1; for (u32 e = mod - 2; e != 0; e >>= 1) { if (e & 1) res = u64(res) * x % mod; x = u64(x) * x % mod; } return res; } private: u32 m_, M0M1_mod_m_; static constexpr u32 M0_inv_M1_ = get_inv(M0, M1); static constexpr u32 M0M1_inv_M2_ = get_inv(u64(M0) * M1 % M2, M2); }; } // namespace lib #line 1 "math\\formal_power_series\\convolution.hpp" /** * @brief convolution / 卷积 * */ #line 10 "math\\formal_power_series\\convolution.hpp" #line 1 "math\\formal_power_series\\radix_2_NTT.hpp" /** * @brief radix-2 NTT / 基-2 数论变换 * @docs docs/math/formal_power_series/radix_2_NTT.md */ #include #include #line 13 "math\\formal_power_series\\radix_2_NTT.hpp" #line 1 "traits\\modint.hpp" /** * @brief modint traits / 取模类萃取 * */ namespace lib { template struct modint_traits { using type = typename mod_t::value_type; static constexpr type get_mod() { return mod_t::get_mod(); } static constexpr type get_primitive_root_prime() { return mod_t::get_primitive_root_prime(); } }; } // namespace lib #line 15 "math\\formal_power_series\\radix_2_NTT.hpp" namespace lib { /** * @note 必须用 NTT 友好的模数!!! */ template class NTT { public: NTT() = delete; static void set_root(int len) { static int lim = 0; static constexpr mod_t g(modint_traits::get_primitive_root_prime()); if (lim == 0) { constexpr int offset = 21; rt.resize(1 << offset); irt.resize(1 << offset); rt[0] = irt[0] = 1; mod_t g_t = g.pow(modint_traits::get_mod() >> (offset + 1)), ig_t = g_t.inv(); rt[1 << (offset - 1)] = g_t, irt[1 << (offset - 1)] = ig_t; for (int i = offset - 2; i >= 0; --i) { g_t *= g_t, ig_t *= ig_t; rt[1 << i] = g_t, irt[1 << i] = ig_t; } lim = 1; } for (; (lim << 1) < len; lim <<= 1) { mod_t g = rt[lim], ig = irt[lim]; for (int i = lim + 1, e = lim << 1; i < e; ++i) { rt[i] = rt[i - lim] * g; irt[i] = irt[i - lim] * ig; } } } static void dft(int n, mod_t *x) { for (int j = 0, l = n >> 1; j != l; ++j) { mod_t u = x[j], v = x[j + l]; x[j] = u + v, x[j + l] = u - v; } for (int i = n >> 1; i >= 2; i >>= 1) { for (int j = 0, l = i >> 1; j != l; ++j) { mod_t u = x[j], v = x[j + l]; x[j] = u + v, x[j + l] = u - v; } for (int j = i, l = i >> 1, m = 1; j != n; j += i, ++m) { mod_t root = rt[m]; for (int k = 0; k != l; ++k) { mod_t u = x[j + k], v = x[j + k + l] * root; x[j + k] = u + v, x[j + k + l] = u - v; } } } } static void idft(int n, mod_t *x) { for (int i = 2; i < n; i <<= 1) { for (int j = 0, l = i >> 1; j != l; ++j) { mod_t u = x[j], v = x[j + l]; x[j] = u + v, x[j + l] = u - v; } for (int j = i, l = i >> 1, m = 1; j != n; j += i, ++m) { mod_t root = irt[m]; for (int k = 0; k != l; ++k) { mod_t u = x[j + k], v = x[j + k + l]; x[j + k] = u + v, x[j + k + l] = (u - v) * root; } } } mod_t iv(mod_t(n).inv()); for (int j = 0, l = n >> 1; j != l; ++j) { mod_t u = x[j] * iv, v = x[j + l] * iv; x[j] = u + v, x[j + l] = u - v; } } static void even_dft(int n, mod_t *x) { static constexpr mod_t IT(mod_t(2).inv()); for (int i = 0, j = 0; i != n; i += 2, ++j) x[j] = IT * (x[i] + x[i + 1]); } static void odd_dft(int n, mod_t *x) { static constexpr mod_t IT(mod_t(2).inv()); for (int i = 0, j = 0; i != n; i += 2, ++j) x[j] = IT * irt[j] * (x[i] - x[i + 1]); } static void dft_doubling(int n, mod_t *x) { static constexpr mod_t g(modint_traits::get_primitive_root_prime()); std::copy_n(x, n, x + n); idft(n, x + n); mod_t k(1), t(g.pow((modint_traits::get_mod() - 1) / (n << 1))); for (int i = 0; i != n; ++i) x[n + i] *= k, k *= t; dft(n, x + n); } private: static inline std::vector rt, irt; }; std::uint32_t get_ntt_len(std::uint32_t n) { --n; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; return (n | n >> 16) + 1; } /** * @brief 接收一个多项式,返回二进制翻转后的 DFT 序列,即 x(1), x(-1) 等, * 对于下标 i 和 i^1 必然是两个互为相反数的点值 * * @tparam mod_t * @param n * @param x */ template void dft(int n, mod_t *x) { NTT::set_root(n); NTT::dft(n, x); } /** * @brief 接收二进制翻转后的 DFT 序列,返回多项式序列 mod (x^n - 1) * * @tparam mod_t * @param n * @param x */ template void idft(int n, mod_t *x) { NTT::set_root(n); NTT::idft(n, x); } template void dft(std::vector &x) { NTT::set_root(x.size()); NTT::dft(x.size(), x.data()); } template void idft(std::vector &x) { NTT::set_root(x.size()); NTT::idft(x.size(), x.data()); } } // namespace lib #line 12 "math\\formal_power_series\\convolution.hpp" namespace lib { /** * @brief NTT 模数卷积 * @tparam mod_t NTT 友好的模数类 */ template std::vector convolve(const std::vector &x, const std::vector &y) { int n = x.size(), m = y.size(); if (std::min(n, m) <= 32) { std::vector res(n + m - 1, mod_t(0)); for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) res[i + j] += x[i] * y[j]; return res; } int len = get_ntt_len(n + m - 1); std::vector res(len); std::copy_n(x.begin(), n, res.begin()); std::fill(res.begin() + n, res.end(), mod_t(0)); dft(res); if (&x == &y) { for (int i = 0; i < len; ++i) res[i] *= res[i]; } else { std::vector y_tmp(len); std::copy_n(y.begin(), m, y_tmp.begin()); std::fill(y_tmp.begin() + m, y_tmp.end(), mod_t(0)); dft(y_tmp); for (int i = 0; i < len; ++i) res[i] *= y_tmp[i]; } idft(res); res.resize(n + m - 1); return res; } } // namespace lib #line 15 "math\\formal_power_series\\arbitrary_modulo_convolution.hpp" namespace lib { /** * @brief 任意模数卷积 * @note 只适用于模数为 32 位 */ template std::enable_if_t && (sizeof(ModType) <= 4) && std::is_integral_v, std::vector> convolve_mod(const std::vector &x, const std::vector &y, ModType mod) { using u32 = std::uint32_t; static constexpr u32 M0 = 880803841, M1 = 897581057, M2 = 998244353; NTTCRT3 crt(mod); using mod_t0 = MontModInt; using mod_t1 = MontModInt; using mod_t2 = MontModInt; auto res0 = convolve(std::vector(x.begin(), x.end()), std::vector(y.begin(), y.end())); auto res1 = convolve(std::vector(x.begin(), x.end()), std::vector(y.begin(), y.end())); auto res2 = convolve(std::vector(x.begin(), x.end()), std::vector(y.begin(), y.end())); int n = res0.size(); std::vector res; res.reserve(n); for (int i = 0; i < n; ++i) res.emplace_back(crt(u32(res0[i]), u32(res1[i]), u32(res2[i]))); return res; } } // namespace lib #line 1 "math\\modulo\\factorial_modulo_prime.hpp" /** * @brief factorial modulo prime / 阶乘模素数 * @docs docs/math/modulo/factorial_modulo_prime.md */ #line 11 "math\\modulo\\factorial_modulo_prime.hpp" #include #include #include #line 15 "math\\modulo\\factorial_modulo_prime.hpp" #line 1 "math\\formal_power_series\\sample_points_shift.hpp" /** * @brief sample points shift / 样本点平移 * @docs docs/math/formal_power_series/sample_points_shift.md */ #line 12 "math\\formal_power_series\\sample_points_shift.hpp" #include #line 14 "math\\formal_power_series\\sample_points_shift.hpp" #line 1 "math\\formal_power_series\\NTT_binomial.hpp" /** * @brief NTT prime binomial / NTT 素数用二项式系数 * */ #line 10 "math\\formal_power_series\\NTT_binomial.hpp" namespace lib { template class NTTBinomial { public: NTTBinomial(int lim = 0) { if (fac_.empty()) { fac_.emplace_back(1); ifac_.emplace_back(1); } init(lim); } ~NTTBinomial() = default; /** * @brief 预处理 [0, n) 的阶乘和其逆元 */ static void init(int n) { if (int(fac_.size()) < n) { int old_size = fac_.size(); fac_.resize(n); ifac_.resize(n); for (int i = old_size; i < n; ++i) fac_[i] = fac_[i - 1] * mod_t(i); ifac_.back() = mod_t(1) / fac_.back(); for (int i = n - 2; i >= old_size; --i) ifac_[i] = ifac_[i + 1] * mod_t(i + 1); } } static void reset() { fac_.clear(); ifac_.clear(); } mod_t fac_unsafe(int n) const { return fac_[n]; } mod_t ifac_unsafe(int n) const { return ifac_[n]; } mod_t inv_unsafe(int n) const { return ifac_[n] * fac_[n - 1]; } mod_t choose_unsafe(int n, int k) const { // 返回 binom{n}{k} 注意上指标可以为负数但这里并未实现! return n >= k ? fac_[n] * ifac_[k] * ifac_[n - k] : mod_t(0); } private: static inline std::vector fac_, ifac_; }; } // namespace lib #line 1 "math\\formal_power_series\\falling_factorial_polynomial_multiplication.hpp" /** * @brief falling factorial polynomial multiplication / 下降幂多项式乘法 * @docs docs/math/formal_power_series/falling_factorial_polynomial_multiplication.md */ #line 13 "math\\formal_power_series\\falling_factorial_polynomial_multiplication.hpp" #line 16 "math\\formal_power_series\\falling_factorial_polynomial_multiplication.hpp" namespace lib { /** * @brief 样本点转换为下降幂多项式 * * @tparam mod_t NTT 友好的模数 * @param pts f(0), f(1), …, f(n-1) * @return std::vector 下降幂多项式系数 */ template std::vector sample_points_to_FFP(const std::vector &pts) { int n = pts.size(); NTTBinomial bi(n); std::vector emx(n), pts_egf(n); for (int i = 0; i < n; ++i) { pts_egf[i] = pts[i] * (emx[i] = bi.ifac_unsafe(i)); if (i & 1) emx[i] = -emx[i]; } pts_egf = std::move(convolve(emx, pts_egf)); pts_egf.resize(n); return pts_egf; } /** * @brief 下降幂多项式转换为样本点 * * @tparam mod_t NTT 友好的模数 * @param n * @param ffp 下降幂多项式 * @return std::vector f(0), f(1), …, f(n-1) */ template std::vector FFP_to_sample_points(int n, const std::vector &ffp) { NTTBinomial bi(n); std::vector ex(n); for (int i = 0; i < n; ++i) ex[i] = bi.ifac_unsafe(i); if (ffp.size() > n) { ex = std::move(convolve(ex, std::vector(ffp.begin(), ffp.begin() + n))); } else { ex = std::move(convolve(ex, ffp)); } for (int i = 0; i < n; ++i) ex[i] *= bi.fac_unsafe(i); ex.resize(n); return ex; } /** * @brief 下降幂多项式乘法 * @note 这里若展开写可以节省一次 DFT */ template std::vector convolve_FFP(const std::vector &lhs, const std::vector &rhs) { int d = lhs.size() + rhs.size() - 1; std::vector lhs_pts(FFP_to_sample_points(d, lhs)), rhs_pts(FFP_to_sample_points(d, rhs)); for (int i = 0; i < d; ++i) lhs_pts[i] *= rhs_pts[i]; return sample_points_to_FFP(lhs_pts); } /** * @brief 下降幂多项式平移 */ template std::vector shift_FFP(const std::vector &ffp, mod_t c) { int n = ffp.size(); NTTBinomial bi(n); std::vector A(ffp), B(n); mod_t c_i(1); for (int i = 0; i < n; ++i) A[i] *= bi.fac_unsafe(i), B[i] = c_i * bi.ifac_unsafe(i), c_i *= c - mod_t(i); std::reverse(A.begin(), A.end()); A = std::move(convolve(A, B)); A.resize(n); std::reverse(A.begin(), A.end()); for (int i = 0; i < n; ++i) A[i] *= bi.ifac_unsafe(i); return A; } } // namespace lib #line 18 "math\\formal_power_series\\sample_points_shift.hpp" namespace lib { /** * @brief 样本点平移(通过下降幂多项式平移) * * @tparam mod_t NTT 友好的模数 * @param n 返回值的点数,需大于零 * @param pts f(0), f(1), …, f(k-1) 确定一个唯一多项式 mod x^{\underline{k}} * @param m 平移距离 f(x) => f(x+m) * @return std::vector f(m), f(m+1), …, f(m+n-1) */ template std::vector shift_sample_points_via_FFP(int n, const std::vector &pts, mod_t m) { return FFP_to_sample_points(n, shift_FFP(sample_points_to_FFP(pts), m)); } template std::vector shift_sample_points_via_FFP(const std::vector &pts, mod_t m) { return shift_sample_points_via_FFP(pts.size(), pts, m); } /** * @brief 样本点平移(通过拉格朗日插值公式) * @note 不安全的算法 * @tparam mod_t NTT 友好的模数 * @param n 返回值的点数,需大于零 * @param pts f(0), f(1), …, f(k-1) 确定一个唯一多项式 mod x^{\underline{k}} * @param m 平移距离 f(x) => f(x+m) * @return std::vector f(m), f(m+1), …, f(m+n-1) */ template std::vector shift_sample_points_unsafe(int n, const std::vector &pts, mod_t m) { int s = pts.size(), deg_A = s - 1; NTTBinomial bi(s); std::vector A(s), B(deg_A + n), p_sum(deg_A + n); for (int i = 0; i < s; ++i) { A[i] = pts[i] * bi.ifac_unsafe(i) * bi.ifac_unsafe(deg_A - i); if ((deg_A - i) & 1) A[i] = -A[i]; } const mod_t ZERO(0); for (int i = 0; i < deg_A + n; ++i) { B[i] = m + mod_t(i - deg_A); assert(B[i] != ZERO); } std::partial_sum(B.begin(), B.end(), p_sum.begin(), std::multiplies<>()); mod_t p_inv = mod_t(1) / p_sum.back(); for (int i = deg_A + n - 1; i > 0; --i) { mod_t t = p_inv * B[i]; B[i] = p_inv * p_sum[i - 1]; p_inv = t; } B[0] = p_inv; p_sum = B; int len = get_ntt_len(s + s - 1 + n - (s < 2 ? 0 : deg_A - 1) - 1); p_sum.resize(len, ZERO); A.resize(len, ZERO); { using u32 = std::uint32_t; static constexpr u32 M0 = 880803841, M1 = 897581057, M2 = 998244353; NTTCRT3 crt(mod_t::get_mod()); using mod_t0 = MontModInt; using mod_t1 = MontModInt; using mod_t2 = MontModInt; std::vector A0(len), B0(len); std::vector A1(len), B1(len); std::vector A2(len), B2(len); for (int i = 0; i < len; ++i) { u32 vvv = u32(A[i]); A0[i] = vvv; A1[i] = vvv; A2[i] = vvv; vvv = u32(p_sum[i]); B0[i] = vvv; B1[i] = vvv; B2[i] = vvv; } dft(A0), dft(B0), dft(A1), dft(B1), dft(A2), dft(B2); for (int i = 0; i < len; ++i) { A0[i] *= B0[i]; A1[i] *= B1[i]; A2[i] *= B2[i]; } idft(A0), idft(A1), idft(A2); A.clear(); for (int i = 0; i < len; ++i) A.emplace_back(crt(u32(A0[i]), u32(A1[i]), u32(A2[i]))); } mod_t coeff(m); for (int i = 1; i < s; ++i) coeff *= m - mod_t(i); for (int i = 0; i < n; ++i) A[i] = A[deg_A + i] * coeff, coeff *= (m + mod_t(i + 1)) * B[i]; A.resize(n); return A; } template std::vector shift_sample_points_unsafe(const std::vector &pts, mod_t m) { return shift_sample_points_unsafe(pts.size(), pts, m); } } // namespace lib #line 18 "math\\modulo\\factorial_modulo_prime.hpp" namespace lib { /** * @brief NTT 友好模数的阶乘计算 * * @tparam mod_t NTT 友好的模数 */ template class NTTPrimeFactorial { public: using u32 = std::uint32_t; using u64 = std::uint64_t; NTTPrimeFactorial() : v_(1) { while (v_ * v_ < modint_traits::get_mod()) v_ <<= 1; mod_t iv = mod_t(1) / mod_t(v_); fac_table_ = std::vector{mod_t(1), mod_t(v_ + 1)}; fac_table_.reserve(v_ + 1); for (u64 d = 1; d != v_; d <<= 1) { std::vector g0(shift_sample_points_unsafe(d, fac_table_, mod_t(d + 1))); std::vector g1(shift_sample_points_unsafe(d << 1 | 1, fac_table_, mod_t(d) * iv)); std::copy(g0.begin(), g0.end(), std::back_inserter(fac_table_)); for (int i = 0; i <= (d << 1); ++i) fac_table_[i] *= g1[i]; } std::partial_sum(fac_table_.begin(), fac_table_.end(), fac_table_.begin(), std::multiplies<>()); } ~NTTPrimeFactorial() = default; mod_t fac(mod_t n) const { mod_t res(1); u64 pass = u64(n) / v_; if (pass != 0) res *= fac_table_[pass - 1]; for (mod_t ONE(1), mpass(pass * v_); mpass != n;) res *= (mpass += ONE); return res; } private: u64 v_; std::vector fac_table_; }; } // namespace lib #line 1 "modint\\runtime_long_Montgomery_modint.hpp" /** * @brief runtime long Montgomery modint / 运行时长整型 Montgomery 取模类 * */ #line 12 "modint\\runtime_long_Montgomery_modint.hpp" #include #line 14 "modint\\runtime_long_Montgomery_modint.hpp" #ifdef _MSC_VER #include #endif namespace lib { /** * @brief 运行时长整型 Montgomery 取模类 * @see https://nyaannyaan.github.io/library/modint/montgomery-modint.hpp * @author Nyaan * @note 约定不使用模板中 int 为负数的对象 */ template class RuntimeLongMontgomeryModInt { public: using u32 = std::uint32_t; using i64 = std::int64_t; using u64 = std::uint64_t; using m64 = RuntimeLongMontgomeryModInt; using value_type = u64; static u64 get_mod() { return mod; } static bool set_mod(u64 m) { if ((m & 1) == 0 || m == 1 || (m & (1ULL << 63)) != 0) return false; mod = m; r = get_r(); r2 = get_r2(); return true; } RuntimeLongMontgomeryModInt() = default; ~RuntimeLongMontgomeryModInt() = default; template , int> = 0> RuntimeLongMontgomeryModInt(T v) : v_(reduce(mul(norm(v % i64(mod)), r2))) {} RuntimeLongMontgomeryModInt(const m64 &) = default; u64 get() const { return reduce({0, v_}); } template , int> = 0> explicit operator T() const { return T(get()); } m64 operator-() const { m64 res; res.v_ = (mod & -(v_ != 0)) - v_; return res; } m64 inv() const { i64 x1 = 1, x3 = 0, a = get(), b = mod; while (b != 0) { i64 q = a / b, x1_old = x1, a_old = a; x1 = x3, x3 = x1_old - x3 * q, a = b, b = a_old - b * q; } return m64(x1); } m64 &operator=(const m64 &) = default; m64 &operator+=(const m64 &rhs) { v_ += rhs.v_ - mod; v_ += mod & -(v_ >> 63); return *this; } m64 &operator-=(const m64 &rhs) { v_ -= rhs.v_; v_ += mod & -(v_ >> 63); return *this; } m64 &operator*=(const m64 &rhs) { v_ = reduce(mul(v_, rhs.v_)); return *this; } m64 &operator/=(const m64 &rhs) { return operator*=(rhs.inv()); } friend m64 operator+(const m64 &lhs, const m64 &rhs) { return m64(lhs) += rhs; } friend m64 operator-(const m64 &lhs, const m64 &rhs) { return m64(lhs) -= rhs; } friend m64 operator*(const m64 &lhs, const m64 &rhs) { return m64(lhs) *= rhs; } friend m64 operator/(const m64 &lhs, const m64 &rhs) { return m64(lhs) /= rhs; } friend bool operator==(const m64 &lhs, const m64 &rhs) { return lhs.v_ == rhs.v_; } friend bool operator!=(const m64 &lhs, const m64 &rhs) { return lhs.v_ != rhs.v_; } friend std::istream &operator>>(std::istream &is, m64 &rhs) { i64 x; is >> x; rhs = m64(x); return is; } friend std::ostream &operator<<(std::ostream &os, const m64 &rhs) { return os << rhs.get(); } m64 pow(u64 y) const { m64 res(1), x(*this); for (; y != 0; y >>= 1, x *= x) if (y & 1) res *= x; return res; } private: static std::pair mul(u64 x, u64 y) { #ifdef __GNUC__ unsigned __int128 res = (unsigned __int128)x * y; return {u64(res >> 64), u64(res)}; #elif defined(_MSC_VER) u64 h, l = _umul128(x, y, &h); return {h, l}; #else u64 a = x >> 32, b = u32(x), c = y >> 32, d = u32(y), ad = a * d, bc = b * c; return {a * c + (ad >> 32) + (bc >> 32) + (((ad & ~UINT32_C(0)) + (bc & ~UINT32_C(0)) + (b * d >> 32)) >> 32), x * y}; #endif } static u64 mulh(u64 x, u64 y) { #ifdef __GNUC__ return u64((unsigned __int128)x * y >> 64); #elif defined(_MSC_VER) return __umulh(x, y); #else u64 a = x >> 32, b = u32(x), c = y >> 32, d = u32(y), ad = a * d, bc = b * c; return a * c + (ad >> 32) + (bc >> 32) + (((ad & ~UINT32_C(0)) + (bc & ~UINT32_C(0)) + (b * d >> 32)) >> 32); #endif } static u64 get_r() { u64 two = 2, iv = mod * (two - mod * mod); iv *= two - mod * iv; iv *= two - mod * iv; iv *= two - mod * iv; return iv * (two - mod * iv); } static u64 get_r2() { u64 iv = -u64(mod) % mod; for (int i = 0; i != 64; ++i) if ((iv <<= 1) >= mod) iv -= mod; return iv; } static u64 reduce(const std::pair &x) { u64 res = x.first - mulh(x.second * r, mod); return res + (mod & -(res >> 63)); } static u64 norm(i64 x) { return x + (mod & -(x < 0)); } u64 v_; static inline u64 mod, r, r2; }; template using RuntimeLongMontModInt = RuntimeLongMontgomeryModInt; } // namespace lib #line 7 "local_test\\test.cpp" int main() { #ifdef LOCAL std::freopen("in", "r", stdin), std::freopen("out", "w", stdout); #endif std::ios::sync_with_stdio(false); std::cin.tie(0); using mint = lib::MontModInt<1000000007>; long long n; std::cin >> n; if (n >= 1000000007) std::cout << "0"; else std::cout << lib::NTTPrimeFactorial().fac(mint(n)); return 0; }