結果

問題 No.502 階乗を計算するだけ
ユーザー Min_25
提出日時 2017-04-08 11:52:25
言語 C++14
(gcc 13.3.0 + boost 1.87.0)
結果
CE  
(最新)
AC  
(最初)
実行時間 -
コード長 21,706 bytes
コンパイル時間 1,295 ms
コンパイル使用メモリ 92,512 KB
最終ジャッジ日時 2025-03-26 22:54:45
合計ジャッジ時間 2,247 ms
ジャッジサーバーID
(参考情報)
judge4 / judge3
このコードへのチャレンジ
(要ログイン)
コンパイルエラー時のメッセージ・ソースコードは、提出者また管理者しか表示できないようにしております。(リジャッジ後のコンパイルエラーは公開されます)
ただし、clay言語の場合は開発者のデバッグのため、公開されます。

コンパイルメッセージ
In file included from /usr/include/c++/13/string:43,
                 from /usr/include/c++/13/bits/locale_classes.h:40,
                 from /usr/include/c++/13/bits/ios_base.h:41,
                 from /usr/include/c++/13/ios:44,
                 from /usr/include/c++/13/ostream:40,
                 from /usr/include/c++/13/iostream:41,
                 from main.cpp:10:
/usr/include/c++/13/bits/allocator.h: In destructor ‘std::_Vector_base<int, std::allocator<int> >::_Vector_impl::~_Vector_impl()’:
/usr/include/c++/13/bits/allocator.h:184:7: error: inlining failed in call to ‘always_inline’ ‘std::allocator< <template-parameter-1-1> >::~allocator() noexcept [with _Tp = int]’: target specific option mismatch
  184 |       ~allocator() _GLIBCXX_NOTHROW { }
      |       ^
In file included from /usr/include/c++/13/vector:66,
                 from main.cpp:11:
/usr/include/c++/13/bits/stl_vector.h:133:14: note: called from here
  133 |       struct _Vector_impl
      |              ^~~~~~~~~~~~

ソースコード

diff #
プレゼンテーションモードにする

#pragma GCC optimize ("O3")
#pragma GCC target ("avx")
#include <cstdio>
#include <cassert>
#include <cmath>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <vector>
#include <functional>
#ifdef __x86_64__
#define NTT64
#endif
#define _rep(_1, _2, _3, _4, name, ...) name
#define rep2(i, n) rep3(i, 0, n)
#define rep3(i, a, b) rep4(i, a, b, 1)
#define rep4(i, a, b, c) for (int i = int(a); i < int(b); i += int(c))
#define rep(...) _rep(__VA_ARGS__, rep4, rep3, rep2, _)(__VA_ARGS__)
using namespace std;
using i64 = long long;
using u32 = unsigned;
using u64 = unsigned long long;
using f80 = long double;
namespace ntt {
#ifdef NTT64
using word_t = u64;
using dword_t = __uint128_t;
#else
using word_t = u32;
using dword_t = u64;
#endif
static const int word_bits = 8 * sizeof(word_t);
template <word_t mod, word_t prim_root>
class Mod {
private:
static constexpr word_t mul_inv(word_t n, int e=6, word_t x=1) {
return e == 0 ? x : mul_inv(n, e-1, x*(2-x*n));
}
public:
static constexpr word_t inv = mul_inv(mod);
static constexpr word_t r2 = -dword_t(mod) % mod;
static constexpr int level = __builtin_ctzll(mod - 1);
static_assert(inv * mod == 1, "invalid 1/M modulo 2^@.");
Mod() {}
Mod(word_t n) : x(init(n)) {};
static word_t modulus() { return mod; }
static word_t init(word_t w) { return reduce(dword_t(w) * r2); }
static word_t reduce(const dword_t w) { return word_t(w >> word_bits) + mod - word_t((dword_t(word_t(w) * inv) * mod) >> word_bits); }
static Mod omega() { return Mod(prim_root).pow((mod - 1) >> level); }
Mod& operator += (Mod rhs) { this->x += rhs.x; return *this; }
Mod& operator -= (Mod rhs) { this->x += 3 * mod - rhs.x; return *this; }
Mod& operator *= (Mod rhs) { this->x = reduce(dword_t(this->x) * rhs.x); return *this; }
Mod operator + (Mod rhs) const { return Mod(*this) += rhs; }
Mod operator - (Mod rhs) const { return Mod(*this) -= rhs; }
Mod operator * (Mod rhs) const { return Mod(*this) *= rhs; }
word_t get() const { return reduce(this->x) % mod; }
void set(word_t n) const { this->x = n; }
Mod pow(word_t exp) const {
Mod ret = Mod(1);
for (Mod base = *this; exp; exp >>= 1, base *= base) if (exp & 1) ret *= base;
return ret;
}
Mod inverse() const { return pow(mod - 2); }
friend ostream& operator << (ostream& os, const Mod& m) { return os << m.get(); }
static void debug() {
printf("%llu %llu %llu %llu\n", mod, inv, r2, omega().get());
}
word_t x;
};
const int size = 1 << 24;
#ifdef NTT64
using m64_1 = ntt::Mod<709143768229478401, 31>;
using m64_2 = ntt::Mod<711416664922521601, 19>; // <= 712e15 (sub.D = 3)
m64_1 f1[size], g1[size];
m64_2 f2[size], g2[size];
#else
using m32_1 = ntt::Mod<138412033, 5>;
using m32_2 = ntt::Mod<155189249, 6>;
using m32_3 = ntt::Mod<163577857, 23>; // <= 16579e4 (sub.D = 3)
m32_1 f1[size], g1[size];
m32_2 f2[size], g2[size];
m32_3 f3[size], g3[size];
#endif
template <typename mod_t>
void convolve(mod_t* A, int s1, mod_t* B, int s2, bool cyclic=false) {
int s = (cyclic ? max(s1, s2) : s1 + s2 - 1);
int size = 1;
while (size < s) size <<= 1;
mod_t roots[mod_t::level] = { mod_t::omega() };
rep(i, 1, mod_t::level) roots[i] = roots[i - 1] * roots[i - 1];
fill(A + s1, A + size, 0); ntt_dit4(A, size, 1, roots);
if (A == B && s1 == s2) {
rep(i, size) A[i] *= A[i];
} else {
fill(B + s2, B + size, 0); ntt_dit4(B, size, 1, roots);
rep(i, size) A[i] *= B[i];
}
ntt_dit4(A, size, -1, roots);
mod_t inv = mod_t(size).inverse();
rep(i, cyclic ? size : s) A[i] *= inv;
}
template <typename mod_t>
void rev_permute(mod_t* A, int n) {
int r = 0, nh = n >> 1;
rep(i, 1, n) {
for (int h = nh; !((r ^= h) & h); h >>= 1);
if (r > i) swap(A[i], A[r]);
}
}
template <typename mod_t>
void ntt_dit4(mod_t* A, int n, int sign, mod_t* roots) {
rev_permute(A, n);
int logn = __builtin_ctz(n);
assert(logn <= mod_t::level);
if (logn & 1) rep(i, 0, n, 2) {
mod_t a = A[i], b = A[i + 1];
A[i] = a + b; A[i + 1] = a - b;
}
mod_t imag = roots[mod_t::level - 2];
if (sign < 0) imag = imag.inverse();
mod_t one = mod_t(1);
rep(e, 2 + (logn & 1), logn + 1, 2) {
const int m = 1 << e;
const int m4 = m >> 2;
mod_t dw = roots[mod_t::level - e];
if (sign < 0) dw = dw.inverse();
const int block_size = min(n, max(m, (1 << 15) / int(sizeof(A[0]))));
rep(k, 0, n, block_size) {
mod_t w = one, w2 = one, w3 = one;
rep(j, m4) {
rep(i, k + j, k + block_size, m) {
mod_t a0 = A[i + m4 * 0] * one, a2 = A[i + m4 * 1] * w2;
mod_t a1 = A[i + m4 * 2] * w, a3 = A[i + m4 * 3] * w3;
mod_t t02 = a0 + a2, t13 = a1 + a3;
A[i + m4 * 0] = t02 + t13; A[i + m4 * 2] = t02 - t13;
t02 = a0 - a2, t13 = (a1 - a3) * imag;
A[i + m4 * 1] = t02 + t13; A[i + m4 * 3] = t02 - t13;
}
w *= dw; w2 = w * w; w3 = w2 * w;
}
}
}
}
} // namespace ntt
using R = int;
using R64 = i64;
class poly {
public:
#ifdef NTT64
static const int ntt_threshold = 900; // deg(f * g)
static const int quotient_threshold = 1800; // deg(f)
static const int divrem_threshold = 700; // deg(f)
static const int divrem_pre_threshold = 1600; // deg(f)
#else
static const int ntt_threshold = 1500; // deg(f * g)
static const int quotient_threshold = 1500; // deg(f)
static const int divrem_threshold = 800; // deg(f)
static const int divrem_pre_threshold = 1600; // deg(f)
#endif
static R add_mod(R a, R b) { return int(a += b - mod) < 0 ? a + mod : a; }
static R sub_mod(R a, R b) { return int(a -= b) < 0 ? a + mod : a; }
static R64 sub_mul_mod(R64 a, R b, R c) {
i64 t = i64(a) - i64(int(b)) * int(c);
return t < 0 ? t + lmod : t;
}
static R mul_mod(R a, R b) { return R64(a) * b % fast_mod; }
static R mod_inv(R a) {
R b = mod, s = 1, t = 0;
while (b > 0) {
swap(s -= t * (a / b), t);
swap(a %= b, b);
}
if (a > 1) { fprintf(stderr, "Error: invalid modular inverse\n"); exit(1); };
return int(s) < 0 ? s + mod : s;
}
inline static void vec_add(R64* res, int s, const R* f, R c) {
rep(i, s) res[i] = sub_mul_mod(res[i], mod - c, f[i]);
}
inline static void vec_sub(R64* res, int s, const R* f, R c) {
rep(i, s) res[i] = sub_mul_mod(res[i], c, f[i]);
}
#ifdef NTT64
struct fast_div {
using u128 = __uint128_t;
fast_div() {}
fast_div(u64 n) : m(n) {
s = (n == 1) ? 0 : 127 - __builtin_clzll(n - 1);
x = ((u128(1) << s) + n - 1) / n;
}
friend u64 operator / (u64 n, fast_div d) { return u128(n) * d.x >> d.s; }
friend u64 operator % (u64 n, fast_div d) { return n - n / d * d.m; }
u64 m, s, x;
};
#else
struct fast_div {
fast_div() {}
fast_div(u32 n) : m(n) {}
friend u32 operator % (u64 n, fast_div d) { return n % d.m; }
u32 m;
};
#endif
public:
poly() {}
poly(int n) : coefs(n) {}
poly(int n, int c) : coefs(n, c % mod) {}
poly(const R* ar, int s) : coefs(ar, ar + s) {}
poly(const vector<R>& v) : coefs(v) {}
poly(const poly& f, int beg, int end=-1) {
if (end < 0) end = beg, beg = 0;
resize(end - beg);
rep(i, beg, end) if (i < f.size()) coefs[i - beg] = f[i];
}
static int ilog2(u64 n) {
return 63 - __builtin_clzll(n);
}
int size() const { return coefs.size(); }
void resize(int s) { coefs.resize(s); }
void push_back(R c) { coefs.push_back(c); }
const R* data() const { return coefs.data(); }
R* data() { return coefs.data(); }
const R& operator [] (int i) const { return coefs[i]; }
R& operator [] (int i) { return coefs[i]; }
void reverse() { std::reverse(coefs.begin(), coefs.end()); }
poly operator - () {
poly ret = *this;
rep(i, ret.size()) ret[i] = (ret[i] == 0 ? 0 : mod - ret[i]);
return ret;
}
poly& operator += (const poly& rhs) {
if (size() < rhs.size()) resize(rhs.size());
rep(i, rhs.size()) coefs[i] = add_mod(coefs[i], rhs[i]);
return *this;
}
poly& operator -= (const poly& rhs) {
if (size() < rhs.size()) resize(rhs.size());
rep(i, rhs.size()) coefs[i] = sub_mod(coefs[i], rhs[i]);
return *this;
}
poly& operator *= (const poly& rhs) { return *this = *this * rhs; }
poly& rev_add(const poly& rhs) {
if (size() < rhs.size()) {
int s = size();
resize(rhs.size());
rep(i, s) coefs[size() - 1 - i] = coefs[s - 1 - i];
rep(i, size() - s) coefs[i] = 0;
}
rep(i, rhs.size()) coefs[size() - 1 - i] = \
add_mod(coefs[size() - 1 - i], rhs.coefs[rhs.size() - 1 - i]);
return *this;
}
poly operator + (const poly& rhs) const { return poly(*this) += rhs; }
poly operator - (const poly& rhs) const { return poly(*this) -= rhs; }
poly operator * (const poly& rhs) const { return this->mul(rhs); }
static void set_mod(R m, int N=2) {
mod = m;
lmod = R64(m) << 32;
N = max(2, N);
fast_mod = fast_div(mod);
invs.assign(N, 1);
facts.assign(N, 1);
ifacts.assign(N, 1);
invs[1] = 1;
rep(i, 2, N + 1) {
invs[i] = mul_mod(invs[mod % i], mod - mod / i);
facts[i] = mul_mod(facts[i - 1], i);
ifacts[i] = mul_mod(ifacts[i - 1], invs[i]);
}
}
private:
#ifdef NTT64
static poly mul_crt(int beg, int end) {
using namespace ntt;
auto inv = m64_2(m64_1::modulus()).inverse();
auto mod1 = m64_1::modulus() % fast_mod;
poly ret(end - beg);
rep(i, ret.size()) {
u64 r1 = f1[i + beg].get(), r2 = f2[i + beg].get();
ret[i] = (r1 + (m64_2(r2 + m64_2::modulus() - r1) * inv).get() % fast_mod * mod1) % fast_mod;
}
return ret;
}
static void mul2(const poly& f, const poly& g, bool cyclic=false) {
using namespace ntt;
if (&f == &g) {
rep(i, f.size()) f1[i] = f[i];
convolve(f1, f.size(), f1, f.size(), cyclic);
rep(i, f.size()) f2[i] = f[i];
convolve(f2, f.size(), f2, f.size(), cyclic);
} else {
rep(i, f.size()) f1[i] = f[i]; rep(i, g.size()) g1[i] = g[i];
convolve(f1, f.size(), g1, g.size(), cyclic);
rep(i, f.size()) f2[i] = f[i]; rep(i, g.size()) g2[i] = g[i];
convolve(f2, f.size(), g2, g.size(), cyclic);
}
}
#else
static poly mul_crt(int beg, int end) {
using namespace ntt;
auto m1 = m32_1::modulus();
auto m2 = m32_2::modulus();
auto m3 = m32_3::modulus();
auto m12 = u64(m1) * m2;
poly ret(end - beg);
u32 m12m = m12 % mod;
u32 inv1 = m32_2(m1).inverse().get();
u32 inv12 = m32_3(m12 % m3).inverse().get();
rep(i, ret.size()) {
u32 r1 = f1[i + beg].get(), r2 = f2[i + beg].get(), r3 = f3[i + beg].get();
u64 r = r1 + u64(r2 + m2 - r1) * inv1 % m2 * m1;
ret[i] = (r + u64(r3 + m3 - r % m3) * inv12 % m3 * m12m) % mod;
}
return ret;
}
static void mul2(const poly& f, const poly& g, bool cyclic=false) {
using namespace ntt;
if (&f == &g) {
rep(i, f.size()) f1[i] = f[i] % m32_1::modulus();
convolve(f1, f.size(), f1, f.size(), cyclic);
rep(i, f.size()) f2[i] = f[i] % m32_2::modulus();
convolve(f2, f.size(), f2, f.size(), cyclic);
rep(i, f.size()) f3[i] = f[i] % m32_3::modulus();
convolve(f3, f.size(), f3, f.size(), cyclic);
} else {
rep(i, f.size()) f1[i] = f[i] % m32_1::modulus();
rep(i, g.size()) g1[i] = g[i] % m32_1::modulus();
convolve(f1, f.size(), g1, g.size(), cyclic);
rep(i, f.size()) f2[i] = f[i] % m32_2::modulus();
rep(i, g.size()) g2[i] = g[i] % m32_2::modulus();
convolve(f2, f.size(), g2, g.size(), cyclic);
rep(i, f.size()) f3[i] = f[i] % m32_3::modulus();
rep(i, g.size()) g3[i] = g[i] % m32_3::modulus();
convolve(f3, f.size(), g3, g.size(), cyclic);
}
}
#endif
public:
static void amul(const R* f, int s1, const R* g, int s2, R* res) {
int s = s1 + s2 - 1;
tmp64.assign(s, 0);
rep(i, s2) if (g[i]) vec_add(tmp64.data() + i, s1, f, g[i]);
rep(i, s) res[i] = tmp64[i] % fast_mod;
}
static void aquotient(const R* f, int s1, const R* g, int s2, R* res) {
tmp64.resize(s1);
rep(i, s1) tmp64[i] = f[i];
rep(i, s1) {
R c = tmp64[i] % mod;
if (c) vec_sub(tmp64.data() + i + 1, min(s1 - i, s2) - 1, g + 1, c);
res[i] = c;
}
}
static void adivrem(const R* f, int s1, const R* g, int s2, R* q, R* r, bool need_r=true) {
int sq = s1 - s2 + 1;
tmp64.resize(s1);
rep(i, s1) tmp64[i] = f[i];
R inv = mod_inv(g[0]);
rep(i, sq) {
R c = tmp64[i] % mod * inv % mod;
if (c) vec_sub(tmp64.data() + i + 1, s2 - 1, g + 1, c);
q[i] = c;
}
if (need_r) rep(i, sq, s1) r[i - sq] = tmp64[i] % fast_mod;
}
poly mul_basecase(const poly& g) const {
const auto& f = *this;
int s = size() + g.size() - 1;
poly ret(s);
amul(f.data(), f.size(), g.data(), g.size(), ret.data());
return ret;
}
poly quotient_basecase(const poly& g) const {
const auto& f = *this;
int s = size();
poly q(s);
aquotient(f.data(), f.size(), g.data(), g.size(), q.data());
return q;
}
pair<poly, poly> divrem_basecase(const poly& g) const {
const auto& f = *this;
int s1 = f.size(), s2 = g.size();
int sq = s1 - s2 + 1;
auto q = poly(sq), r = poly(g.size() - 1);
adivrem(f.data(), s1, g.data(), s2, q.data(), r.data());
return make_pair(q, r);
}
// 1.0 * M(n)
poly mul(const poly& g) const {
const auto& f = *this;
if (f.size() == 0 || g.size() == 0) return poly();
if (f.size() + g.size() <= ntt_threshold) {
return f.mul_basecase(g);
} else {
mul2(f, g, false);
return mul_crt(0, f.size() + g.size() - 1);
}
}
// 0.5 * M(n)
poly mul_cyclically(const poly& g) const {
const auto& f = *this;
if (f.size() == 0 || g.size() == 0) return poly();
mul2(f, g, true);
int s = max(f.size(), g.size()), size = 1;
while (size < s) size <<= 1;
return mul_crt(0, size);
}
// 1.0 * M(n)
poly middle_product(const poly& g) const {
const poly& f = *this;
if (f.size() == 0 || g.size() == 0) return poly();
mul2(f, g, true);
return mul_crt(f.size(), g.size());
}
// 2.0 * M(n)
poly inverse(int prec=-1) const {
if (prec < 0) prec = size();
poly ret(1, 1);
for (int e = 1, ne; e < prec; e = ne) {
ne = min(2 * e, prec);
poly h = poly(ret, ne - e) * -ret.middle_product(poly(*this, ne));
rep(i, e, ne) ret.push_back(h[i - e]);
}
return ret;
}
// 2.5 * M(n)
poly quotient(const poly& b) const {
assert(size() == b.size());
if (b.size() < quotient_threshold) {
return quotient_basecase(b);
}
int s = size() / 2 + 1;
poly inv = b.inverse(s);
poly q1 = poly(poly(*this, s) * inv, s);
poly lo = q1.middle_product(b);
poly q2 = poly(inv, size() - s) * (poly(*this, s, size()) - lo);
rep(i, size() - s) q1.push_back(q2[i]);
return q1;
}
// 0.5 * M(n) : f - q * d
static poly sub_mul(const poly& f, const poly& q, const poly& d) {
int sq = q.size();
poly p = q.mul_cyclically(d);
int mask = p.size() - 1;
rep(i, sq) p[i & mask] = sub_mod(p[i & mask], f[i & mask]);
poly r = poly(f, sq, f.size());
rep(i, r.size()) r[i] = sub_mod(r[i], p[(sq + i) & mask]);
return r;
}
// 3.0 * M(n)
pair<poly, poly> divrem(const poly& b) const {
if (size() < b.size()) return make_pair(poly(), poly(*this));
if (size() < divrem_threshold) {
return divrem_basecase(b);
}
assert(size() < 2 * b.size());
int sq = size() - b.size() + 1;
poly q = poly(*this, sq).quotient(poly(b, sq));
poly r = sub_mul(*this, q, b);
return make_pair(q, r);
}
// 3.0 * M(n)
poly rem(const poly& b) const {
return divrem(b).second;
}
// 1.5 * M(n)
pair<poly, poly> divrem_pre(const poly& b, const poly& inv) const {
if (size() < b.size()) {
return make_pair(poly(), poly(*this));
}
if (size() < divrem_pre_threshold) {
return divrem_basecase(b);
}
int sq = size() - b.size() + 1;
assert(size() >= sq && inv.size() >= sq);
poly q = poly(poly(*this, sq) * poly(inv, sq), sq);
poly r = sub_mul(*this, q, b);
return make_pair(q, r);
}
// 1.5 * M(n)
poly rem_pre(const poly& f, const poly& inv) const {
return divrem_pre(f, inv).second;
}
// 2.5 * M(n) : Z/pZ
poly log() const {
assert(size() <= int(invs.size()));
assert(coefs[0] == 1);
poly ret = poly(*this);
rep(i, ret.size() - 1) ret[i] = mul_mod(ret[i + 1], i + 1);
ret = ret.quotient(*this);
for (int i = ret.size() - 1; i > 0; --i) ret[i] = mul_mod(ret[i - 1], invs[i]);
ret[0] = 0;
return ret;
}
// 4.0 * M(n) : Z/pZ
poly exp() const {
assert(size() <= int(invs.size()));
assert(coefs[0] == 0);
poly expf = poly(1, 1);
poly expfr = poly(1, 1);
poly df = poly(*this);
rep(i, df.size() - 1) df[i] = mul_mod(df[i + 1], i + 1);
for (int e = 1, pe = 1, ne; e < size(); pe = e, e = ne) {
ne = min(2 * e, size());
poly tmp = expfr * expfr.middle_product(expf);
rep(i, e - pe) expfr.push_back((mod - tmp[i]) % fast_mod);
poly q = expfr * poly(expf * poly(df, e - 1), e - 1, ne - 1);
tmp.resize(0);
rep(i, e, ne) tmp.push_back((coefs[i] + mul_mod(q[i - e], invs[i])) % fast_mod);
tmp = tmp * expf;
rep(i, ne - e) expf.push_back(tmp[i]);
}
return expf;
}
// 13/6 * M(n) * log_2(e) : x^e (mod f)
static poly x_pow_mod(u64 e, const poly& f) {
if (e == 0) return poly(1, 1);
poly ret = poly(vector<R>({1, 0}));
poly inv = f.inverse(f.size());
ret = ret.rem_pre(f, inv);
u64 mask = (u64(1) << ilog2(e)) >> 1;
while (mask) {
ret *= ret;
if (e & mask) ret.push_back(0);
ret = ret.rem_pre(f, inv);
mask >>= 1;
}
return ret;
}
// O(n^2) : Z/pZ
poly mod_inverse(const poly& g) const {
auto& f = *this;
assert(f.size() < g.size());
int s1 = g.size(), s2 = f.size();
int t1 = 0, t2 = 1;
tmp32.resize(s1 * 4);
R* b1 = tmp32.data(), *b2 = b1 + s1;
R* c1 = b2 + s1, *c2 = c1 + s1;
c1[0] = 0, c2[0] = 1;
copy(g.data(), g.data() + s1, b1);
copy(f.data(), f.data() + s2, b2);
while (1) {
while (s2 > 0 && *b2 == 0) --s2, ++b2;
if (s2 == 0) break;
int s3 = s2 - 1, sq = s1 - s2 + 1, t3 = s1 - s2 + t2;
R* b3 = b1 + sq, *c3 = c1;
adivrem(b1, s1, b2, s2, b1, b3);
tmp64.assign(t3, 0);
rep(i, t1) tmp64[t3 - 1 - i] = c1[t1 - 1 - i];
rep(i, sq) if (b1[i]) vec_sub(tmp64.data() + i, t2, c2, b1[i]);
rep(i, t3) c3[i] = tmp64[i] % fast_mod;
b1 = b2; b2 = b3; s1 = s2; s2 = s3;
c1 = c2; c2 = c3; t1 = t2; t2 = t3;
}
if (s1 > 1) {
fprintf(stderr, "Error: deg(gcd(f, g)) == %d.\n", s1 - 1);
exit(1);
}
if (b1[0] != 1) {
R inv = mod_inv(b1[0]);
rep(i, t1) c1[i] = mul_mod(c1[i], inv);
}
return poly(c1, t1);
}
R evaluate(R x) const {
R ret = 0;
rep(i, size()) ret = (u64(ret) * x + coefs[i]) % fast_mod;
return ret;
}
static poly expand(vector<R>& cs) {
function< poly(int, int) > rec = [&](int beg, int end) {
if (end - beg == 1) {
return poly(vector<R>({1, cs[beg] % mod}));
}
int mid = (beg + end) / 2;
return rec(beg, mid) * rec(mid, end);
};
return rec(0, cs.size());
}
static vector<R> multipoint_evaluation(const poly& f, vector<R>& points) {
int s = points.size();
int tree_size = 4 << ilog2(s - 1);
vector<poly> tree(tree_size);
function< void(int, int, int) > rec = [&](int beg, int end, int k) {
if (end - beg == 1) {
tree[k] = poly(vector<R>({1, (mod - points[beg] % mod) % mod}));
} else {
int mid = (beg + end) >> 1;
rec(beg, mid, 2 * k + 1);
rec(mid, end, 2 * k + 2);
tree[k] = tree[2 * k + 1] * tree[2 * k + 2];
}
};
rec(0, s, 0);
vector<R> res(s);
function< void(const poly&, int, int, int) > rec2 = [&](const poly& g, int beg, int end, int k) {
auto r = g.rem(tree[k]);
if (end - beg <= 16) {
rep(i, beg, end) res[i] = r.evaluate(points[i]);
} else {
int mid = (beg + end) >> 1;
rec2(r, beg, mid, 2 * k + 1);
rec2(r, mid, end, 2 * k + 2);
}
};
rec2(f, 0, s, 0);
return res;
}
static R fact_mod(int N) {
if (N >= mod) return 0;
if (N <= 1) return 1 % mod;
int v = sqrt(N);
vector<R> cs(v);
rep(i, v) cs[i] = (i * v + 1);
auto f = expand(cs);
rep(i, v) cs[i] = i;
auto vs = multipoint_evaluation(f, cs);
R ret = 1;
rep(i, v) ret = mul_mod(ret, vs[i]);
rep(i, v * v + 1, N + 1) ret = mul_mod(ret, i);
return ret;
}
void print() const {
printf("[");
if (size()) {
printf("%u", coefs[0]);
rep(i, 1, size()) printf(", %u", coefs[i]);
}
puts("]");
}
public:
vector<R> coefs;
static vector<R> tmp32;
static vector<R64> tmp64;
static vector<R> invs, facts, ifacts;
static R mod;
static R64 lmod;
static fast_div fast_mod;
};
R poly::mod;
R64 poly::lmod;
poly::fast_div poly::fast_mod;
vector<R> poly::tmp32;
vector<R64> poly::tmp64;
vector<R> poly::invs, poly::facts, poly::ifacts;
void solve() {
const u32 mod = 1000000007;
poly::set_mod(mod);
i64 N;
while (~scanf("%lld", &N)) {
printf("%d\n", poly::fact_mod(min(i64(mod), N)));
}
}
int main() {
clock_t beg = clock();
solve();
clock_t end = clock();
fprintf(stderr, "%.3f sec\n", double(end - beg) / CLOCKS_PER_SEC);
return 0;
}
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
2