結果
問題 | No.2166 Paint and Fill |
ユーザー | 👑 hos.lyric |
提出日時 | 2023-01-15 19:07:33 |
言語 | C++14 (gcc 12.3.0 + boost 1.83.0) |
結果 |
AC
|
実行時間 | 5,101 ms / 10,000 ms |
コード長 | 45,762 bytes |
コンパイル時間 | 5,625 ms |
コンパイル使用メモリ | 200,564 KB |
実行使用メモリ | 102,108 KB |
最終ジャッジ日時 | 2024-06-09 07:08:39 |
合計ジャッジ時間 | 69,901 ms |
ジャッジサーバーID (参考情報) |
judge3 / judge2 |
(要ログイン)
テストケース
テストケース表示入力 | 結果 | 実行時間 実行使用メモリ |
---|---|---|
testcase_00 | AC | 26 ms
21,772 KB |
testcase_01 | AC | 165 ms
23,312 KB |
testcase_02 | AC | 870 ms
70,228 KB |
testcase_03 | AC | 50 ms
22,312 KB |
testcase_04 | AC | 49 ms
21,844 KB |
testcase_05 | AC | 49 ms
22,216 KB |
testcase_06 | AC | 49 ms
21,460 KB |
testcase_07 | AC | 48 ms
21,108 KB |
testcase_08 | AC | 1,298 ms
40,748 KB |
testcase_09 | AC | 1,309 ms
40,908 KB |
testcase_10 | AC | 1,293 ms
41,704 KB |
testcase_11 | AC | 1,289 ms
40,028 KB |
testcase_12 | AC | 1,281 ms
40,852 KB |
testcase_13 | AC | 3,229 ms
100,944 KB |
testcase_14 | AC | 3,257 ms
102,108 KB |
testcase_15 | AC | 3,238 ms
100,936 KB |
testcase_16 | AC | 3,258 ms
101,392 KB |
testcase_17 | AC | 3,351 ms
100,748 KB |
testcase_18 | AC | 5,101 ms
100,232 KB |
testcase_19 | AC | 5,089 ms
99,416 KB |
testcase_20 | AC | 3,090 ms
98,836 KB |
testcase_21 | AC | 3,095 ms
98,640 KB |
testcase_22 | AC | 4,344 ms
92,784 KB |
testcase_23 | AC | 3,624 ms
98,128 KB |
testcase_24 | AC | 3,566 ms
98,388 KB |
testcase_25 | AC | 25 ms
16,920 KB |
testcase_26 | AC | 25 ms
17,008 KB |
testcase_27 | AC | 521 ms
22,820 KB |
testcase_28 | AC | 699 ms
21,852 KB |
testcase_29 | AC | 621 ms
22,444 KB |
testcase_30 | AC | 697 ms
22,952 KB |
testcase_31 | AC | 705 ms
23,588 KB |
testcase_32 | AC | 709 ms
22,004 KB |
testcase_33 | AC | 702 ms
23,192 KB |
testcase_34 | AC | 695 ms
23,008 KB |
testcase_35 | AC | 699 ms
22,420 KB |
testcase_36 | AC | 704 ms
22,484 KB |
testcase_37 | AC | 704 ms
21,928 KB |
testcase_38 | AC | 696 ms
21,980 KB |
testcase_39 | AC | 702 ms
23,484 KB |
ソースコード
#pragma GCC optimize ("Ofast") #pragma GCC optimize ("unroll-loops") #pragma GCC target ("avx") #include <cassert> #include <cmath> #include <cstdint> #include <cstdio> #include <cstdlib> #include <cstring> #include <algorithm> #include <bitset> #include <complex> #include <deque> #include <functional> #include <iostream> #include <map> #include <numeric> #include <queue> #include <set> #include <sstream> #include <string> #include <unordered_map> #include <unordered_set> #include <utility> #include <vector> using namespace std; using Int = long long; template <class T1, class T2> ostream &operator<<(ostream &os, const pair<T1, T2> &a) { return os << "(" << a.first << ", " << a.second << ")"; }; template <class T> ostream &operator<<(ostream &os, const vector<T> &as) { const int sz = as.size(); os << "["; for (int i = 0; i < sz; ++i) { if (i >= 256) { os << ", ..."; break; } if (i > 0) { os << ", "; } os << as[i]; } return os << "]"; } template <class T> void pv(T a, T b) { for (T i = a; i != b; ++i) cerr << *i << " "; cerr << endl; } template <class T> bool chmin(T &t, const T &f) { if (t > f) { t = f; return true; } return false; } template <class T> bool chmax(T &t, const T &f) { if (t < f) { t = f; return true; } return false; } //////////////////////////////////////////////////////////////////////////////// template <unsigned M_> struct ModInt { static constexpr unsigned M = M_; unsigned x; constexpr ModInt() : x(0U) {} constexpr ModInt(unsigned x_) : x(x_ % M) {} constexpr ModInt(unsigned long long x_) : x(x_ % M) {} constexpr ModInt(int x_) : x(((x_ %= static_cast<int>(M)) < 0) ? (x_ + static_cast<int>(M)) : x_) {} constexpr ModInt(long long x_) : x(((x_ %= static_cast<long long>(M)) < 0) ? (x_ + static_cast<long long>(M)) : x_) {} ModInt &operator+=(const ModInt &a) { x = ((x += a.x) >= M) ? (x - M) : x; return *this; } ModInt &operator-=(const ModInt &a) { x = ((x -= a.x) >= M) ? (x + M) : x; return *this; } ModInt &operator*=(const ModInt &a) { x = (static_cast<unsigned long long>(x) * a.x) % M; return *this; } ModInt &operator/=(const ModInt &a) { return (*this *= a.inv()); } ModInt pow(long long e) const { if (e < 0) return inv().pow(-e); ModInt a = *this, b = 1U; for (; e; e >>= 1) { if (e & 1) b *= a; a *= a; } return b; } ModInt inv() const { unsigned a = M, b = x; int y = 0, z = 1; for (; b; ) { const unsigned q = a / b; const unsigned c = a - q * b; a = b; b = c; const int w = y - static_cast<int>(q) * z; y = z; z = w; } assert(a == 1U); return ModInt(y); } ModInt operator+() const { return *this; } ModInt operator-() const { ModInt a; a.x = x ? (M - x) : 0U; return a; } ModInt operator+(const ModInt &a) const { return (ModInt(*this) += a); } ModInt operator-(const ModInt &a) const { return (ModInt(*this) -= a); } ModInt operator*(const ModInt &a) const { return (ModInt(*this) *= a); } ModInt operator/(const ModInt &a) const { return (ModInt(*this) /= a); } template <class T> friend ModInt operator+(T a, const ModInt &b) { return (ModInt(a) += b); } template <class T> friend ModInt operator-(T a, const ModInt &b) { return (ModInt(a) -= b); } template <class T> friend ModInt operator*(T a, const ModInt &b) { return (ModInt(a) *= b); } template <class T> friend ModInt operator/(T a, const ModInt &b) { return (ModInt(a) /= b); } explicit operator bool() const { return x; } bool operator==(const ModInt &a) const { return (x == a.x); } bool operator!=(const ModInt &a) const { return (x != a.x); } friend std::ostream &operator<<(std::ostream &os, const ModInt &a) { return os << a.x; } }; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// constexpr unsigned MO = 998244353U; constexpr unsigned MO2 = 2U * MO; constexpr int FFT_MAX = 23; using Mint = ModInt<MO>; constexpr Mint FFT_ROOTS[FFT_MAX + 1] = {1U, 998244352U, 911660635U, 372528824U, 929031873U, 452798380U, 922799308U, 781712469U, 476477967U, 166035806U, 258648936U, 584193783U, 63912897U, 350007156U, 666702199U, 968855178U, 629671588U, 24514907U, 996173970U, 363395222U, 565042129U, 733596141U, 267099868U, 15311432U}; constexpr Mint INV_FFT_ROOTS[FFT_MAX + 1] = {1U, 998244352U, 86583718U, 509520358U, 337190230U, 87557064U, 609441965U, 135236158U, 304459705U, 685443576U, 381598368U, 335559352U, 129292727U, 358024708U, 814576206U, 708402881U, 283043518U, 3707709U, 121392023U, 704923114U, 950391366U, 428961804U, 382752275U, 469870224U}; constexpr Mint FFT_RATIOS[FFT_MAX] = {911660635U, 509520358U, 369330050U, 332049552U, 983190778U, 123842337U, 238493703U, 975955924U, 603855026U, 856644456U, 131300601U, 842657263U, 730768835U, 942482514U, 806263778U, 151565301U, 510815449U, 503497456U, 743006876U, 741047443U, 56250497U, 867605899U}; constexpr Mint INV_FFT_RATIOS[FFT_MAX] = {86583718U, 372528824U, 373294451U, 645684063U, 112220581U, 692852209U, 155456985U, 797128860U, 90816748U, 860285882U, 927414960U, 354738543U, 109331171U, 293255632U, 535113200U, 308540755U, 121186627U, 608385704U, 438932459U, 359477183U, 824071951U, 103369235U}; // as[rev(i)] <- \sum_j \zeta^(ij) as[j] void fft(Mint *as, int n) { assert(!(n & (n - 1))); assert(1 <= n); assert(n <= 1 << FFT_MAX); int m = n; if (m >>= 1) { for (int i = 0; i < m; ++i) { const unsigned x = as[i + m].x; // < MO as[i + m].x = as[i].x + MO - x; // < 2 MO as[i].x += x; // < 2 MO } } if (m >>= 1) { Mint prod = 1U; for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) { for (int i = i0; i < i0 + m; ++i) { const unsigned x = (prod * as[i + m]).x; // < MO as[i + m].x = as[i].x + MO - x; // < 3 MO as[i].x += x; // < 3 MO } prod *= FFT_RATIOS[__builtin_ctz(++h)]; } } for (; m; ) { if (m >>= 1) { Mint prod = 1U; for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) { for (int i = i0; i < i0 + m; ++i) { const unsigned x = (prod * as[i + m]).x; // < MO as[i + m].x = as[i].x + MO - x; // < 4 MO as[i].x += x; // < 4 MO } prod *= FFT_RATIOS[__builtin_ctz(++h)]; } } if (m >>= 1) { Mint prod = 1U; for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) { for (int i = i0; i < i0 + m; ++i) { const unsigned x = (prod * as[i + m]).x; // < MO as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO as[i + m].x = as[i].x + MO - x; // < 3 MO as[i].x += x; // < 3 MO } prod *= FFT_RATIOS[__builtin_ctz(++h)]; } } } for (int i = 0; i < n; ++i) { as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO as[i].x = (as[i].x >= MO) ? (as[i].x - MO) : as[i].x; // < MO } } // as[i] <- (1/n) \sum_j \zeta^(-ij) as[rev(j)] void invFft(Mint *as, int n) { assert(!(n & (n - 1))); assert(1 <= n); assert(n <= 1 << FFT_MAX); int m = 1; if (m < n >> 1) { Mint prod = 1U; for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) { for (int i = i0; i < i0 + m; ++i) { const unsigned long long y = as[i].x + MO - as[i + m].x; // < 2 MO as[i].x += as[i + m].x; // < 2 MO as[i + m].x = (prod.x * y) % MO; // < MO } prod *= INV_FFT_RATIOS[__builtin_ctz(++h)]; } m <<= 1; } for (; m < n >> 1; m <<= 1) { Mint prod = 1U; for (int h = 0, i0 = 0; i0 < n; i0 += (m << 1)) { for (int i = i0; i < i0 + (m >> 1); ++i) { const unsigned long long y = as[i].x + MO2 - as[i + m].x; // < 4 MO as[i].x += as[i + m].x; // < 4 MO as[i].x = (as[i].x >= MO2) ? (as[i].x - MO2) : as[i].x; // < 2 MO as[i + m].x = (prod.x * y) % MO; // < MO } for (int i = i0 + (m >> 1); i < i0 + m; ++i) { const unsigned long long y = as[i].x + MO - as[i + m].x; // < 2 MO as[i].x += as[i + m].x; // < 2 MO as[i + m].x = (prod.x * y) % MO; // < MO } prod *= INV_FFT_RATIOS[__builtin_ctz(++h)]; } } if (m < n) { for (int i = 0; i < m; ++i) { const unsigned y = as[i].x + MO2 - as[i + m].x; // < 4 MO as[i].x += as[i + m].x; // < 4 MO as[i + m].x = y; // < 4 MO } } const Mint invN = Mint(n).inv(); for (int i = 0; i < n; ++i) { as[i] *= invN; } } void fft(vector<Mint> &as) { fft(as.data(), as.size()); } void invFft(vector<Mint> &as) { invFft(as.data(), as.size()); } vector<Mint> convolve(vector<Mint> as, vector<Mint> bs) { if (as.empty() || bs.empty()) return {}; const int len = as.size() + bs.size() - 1; int n = 1; for (; n < len; n <<= 1) {} as.resize(n); fft(as); bs.resize(n); fft(bs); for (int i = 0; i < n; ++i) as[i] *= bs[i]; invFft(as); as.resize(len); return as; } vector<Mint> square(vector<Mint> as) { if (as.empty()) return {}; const int len = as.size() + as.size() - 1; int n = 1; for (; n < len; n <<= 1) {} as.resize(n); fft(as); for (int i = 0; i < n; ++i) as[i] *= as[i]; invFft(as); as.resize(len); return as; } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // inv: log, exp, pow // fac: shift // invFac: shift constexpr int LIM_INV = 1 << 20; // @ Mint inv[LIM_INV], fac[LIM_INV], invFac[LIM_INV]; struct ModIntPreparator { ModIntPreparator() { inv[1] = 1; for (int i = 2; i < LIM_INV; ++i) inv[i] = -((Mint::M / i) * inv[Mint::M % i]); fac[0] = 1; for (int i = 1; i < LIM_INV; ++i) fac[i] = fac[i - 1] * i; invFac[0] = 1; for (int i = 1; i < LIM_INV; ++i) invFac[i] = invFac[i - 1] * inv[i]; } } preparator; // polyWork0: *, inv, div, divAt, log, exp, pow, sqrt, shift // polyWork1: inv, div, divAt, log, exp, pow, sqrt, shift // polyWork2: divAt, exp, pow, sqrt // polyWork3: exp, pow, sqrt static constexpr int LIM_POLY = 1 << 20; // @ static_assert(LIM_POLY <= 1 << FFT_MAX, "Poly: LIM_POLY <= 1 << FFT_MAX must hold."); static Mint polyWork0[LIM_POLY], polyWork1[LIM_POLY], polyWork2[LIM_POLY], polyWork3[LIM_POLY]; struct Poly : public vector<Mint> { Poly() {} explicit Poly(int n) : vector<Mint>(n) {} Poly(const vector<Mint> &vec) : vector<Mint>(vec) {} Poly(std::initializer_list<Mint> il) : vector<Mint>(il) {} int size() const { return vector<Mint>::size(); } Mint at(long long k) const { return (0 <= k && k < size()) ? (*this)[k] : 0U; } int ord() const { for (int i = 0; i < size(); ++i) if ((*this)[i]) return i; return -1; } int deg() const { for (int i = size(); --i >= 0; ) if ((*this)[i]) return i; return -1; } Poly mod(int n) const { return Poly(vector<Mint>(data(), data() + min(n, size()))); } friend std::ostream &operator<<(std::ostream &os, const Poly &fs) { os << "["; for (int i = 0; i < fs.size(); ++i) { if (i > 0) os << ", "; os << fs[i]; } return os << "]"; } Poly &operator+=(const Poly &fs) { if (size() < fs.size()) resize(fs.size()); for (int i = 0; i < fs.size(); ++i) (*this)[i] += fs[i]; return *this; } Poly &operator-=(const Poly &fs) { if (size() < fs.size()) resize(fs.size()); for (int i = 0; i < fs.size(); ++i) (*this)[i] -= fs[i]; return *this; } // 3 E(|t| + |f|) Poly &operator*=(const Poly &fs) { if (empty() || fs.empty()) return *this = {}; const int nt = size(), nf = fs.size(); int n = 1; for (; n < nt + nf - 1; n <<= 1) {} assert(n <= LIM_POLY); resize(n); fft(data(), n); // 1 E(n) memcpy(polyWork0, fs.data(), nf * sizeof(Mint)); memset(polyWork0 + nf, 0, (n - nf) * sizeof(Mint)); fft(polyWork0, n); // 1 E(n) for (int i = 0; i < n; ++i) (*this)[i] *= polyWork0[i]; invFft(data(), n); // 1 E(n) resize(nt + nf - 1); return *this; } // 13 E(deg(t) - deg(f) + 1) // rev(t) = rev(f) rev(q) + x^(deg(t)-deg(f)+1) rev(r) Poly &operator/=(const Poly &fs) { const int m = deg(), n = fs.deg(); assert(n != -1); if (m < n) return *this = {}; Poly tsRev(m - n + 1), fsRev(min(m - n, n) + 1); for (int i = 0; i <= m - n; ++i) tsRev[i] = (*this)[m - i]; for (int i = 0, i0 = min(m - n, n); i <= i0; ++i) fsRev[i] = fs[n - i]; const Poly qsRev = tsRev.div(fsRev, m - n + 1); // 13 E(m - n + 1) resize(m - n + 1); for (int i = 0; i <= m - n; ++i) (*this)[i] = qsRev[m - n - i]; return *this; } // 13 E(deg(t) - deg(f) + 1) + 3 E(|t|) Poly &operator%=(const Poly &fs) { const Poly qs = *this / fs; // 13 E(deg(t) - deg(f) + 1) *this -= fs * qs; // 3 E(|t|) resize(deg() + 1); return *this; } Poly &operator*=(const Mint &a) { for (int i = 0; i < size(); ++i) (*this)[i] *= a; return *this; } Poly &operator/=(const Mint &a) { const Mint b = a.inv(); for (int i = 0; i < size(); ++i) (*this)[i] *= b; return *this; } Poly operator+() const { return *this; } Poly operator-() const { Poly fs(size()); for (int i = 0; i < size(); ++i) fs[i] = -(*this)[i]; return fs; } Poly operator+(const Poly &fs) const { return (Poly(*this) += fs); } Poly operator-(const Poly &fs) const { return (Poly(*this) -= fs); } Poly operator*(const Poly &fs) const { return (Poly(*this) *= fs); } Poly operator/(const Poly &fs) const { return (Poly(*this) /= fs); } Poly operator%(const Poly &fs) const { return (Poly(*this) %= fs); } Poly operator*(const Mint &a) const { return (Poly(*this) *= a); } Poly operator/(const Mint &a) const { return (Poly(*this) /= a); } friend Poly operator*(const Mint &a, const Poly &fs) { return fs * a; } // 10 E(n) // f <- f - (t f - 1) f Poly inv(int n) const { assert(!empty()); assert((*this)[0]); assert(1 <= n); assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= LIM_POLY); Poly fs(n); fs[0] = (*this)[0].inv(); for (int m = 1; m < n; m <<= 1) { memcpy(polyWork0, data(), min(m << 1, size()) * sizeof(Mint)); memset(polyWork0 + min(m << 1, size()), 0, ((m << 1) - min(m << 1, size())) * sizeof(Mint)); fft(polyWork0, m << 1); // 2 E(n) memcpy(polyWork1, fs.data(), min(m << 1, n) * sizeof(Mint)); memset(polyWork1 + min(m << 1, n), 0, ((m << 1) - min(m << 1, n)) * sizeof(Mint)); fft(polyWork1, m << 1); // 2 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 2 E(n) memset(polyWork0, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 2 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 2 E(n) for (int i = m, i0 = min(m << 1, n); i < i0; ++i) fs[i] = -polyWork0[i]; } return fs; } // 9 E(n) // Need (4 m)-th roots of unity to lift from (mod x^m) to (mod x^(2m)). // f <- f - (t f - 1) f // (t f^2) mod ((x^(2m) - 1) (x^m - 1^(1/4))) /* Poly inv(int n) const { assert(!empty()); assert((*this)[0]); assert(1 <= n); assert(n == 1 || 3 << (31 - __builtin_clz(n - 1)) <= LIM_POLY); assert(n <= 1 << (FFT_MAX - 1)); Poly fs(n); fs[0] = (*this)[0].inv(); for (int h = 2, m = 1; m < n; ++h, m <<= 1) { const Mint a = FFT_ROOTS[h], b = INV_FFT_ROOTS[h]; memcpy(polyWork0, data(), min(m << 1, size()) * sizeof(Mint)); memset(polyWork0 + min(m << 1, size()), 0, ((m << 1) - min(m << 1, size())) * sizeof(Mint)); { Mint aa = 1; for (int i = 0; i < m; ++i) { polyWork0[(m << 1) + i] = aa * polyWork0[i]; aa *= a; } for (int i = 0; i < m; ++i) { polyWork0[(m << 1) + i] += aa * polyWork0[m + i]; aa *= a; } } fft(polyWork0, m << 1); // 2 E(n) fft(polyWork0 + (m << 1), m); // 1 E(n) memcpy(polyWork1, fs.data(), min(m << 1, n) * sizeof(Mint)); memset(polyWork1 + min(m << 1, n), 0, ((m << 1) - min(m << 1, n)) * sizeof(Mint)); { Mint aa = 1; for (int i = 0; i < m; ++i) { polyWork1[(m << 1) + i] = aa * polyWork1[i]; aa *= a; } for (int i = 0; i < m; ++i) { polyWork1[(m << 1) + i] += aa * polyWork1[m + i]; aa *= a; } } fft(polyWork1, m << 1); // 2 E(n) fft(polyWork1 + (m << 1), m); // 1 E(n) for (int i = 0; i < (m << 1) + m; ++i) polyWork0[i] *= polyWork1[i] * polyWork1[i]; invFft(polyWork0, m << 1); // 2 E(n) invFft(polyWork0 + (m << 1), m); // 1 E(n) // 2 f0 + (-f2), (-f1) + (-f3), 1^(1/4) (-f1) - (-f2) - 1^(1/4) (-f3) { Mint bb = 1; for (int i = 0, i0 = min(m, n - m); i < i0; ++i) { unsigned x = polyWork0[i].x + (bb * polyWork0[(m << 1) + i]).x + MO2 - (fs[i].x << 1); // < 4 MO fs[m + i] = Mint(static_cast<unsigned long long>(FFT_ROOTS[2].x) * x) - polyWork0[m + i]; fs[m + i].x = ((fs[m + i].x & 1) ? (fs[m + i].x + MO) : fs[m + i].x) >> 1; bb *= b; } } } return fs; } */ // 13 E(n) // g = (1 / f) mod x^m // h <- h - (f h - t) g Poly div(const Poly &fs, int n) const { assert(!fs.empty()); assert(fs[0]); assert(1 <= n); if (n == 1) return {at(0) / fs[0]}; // m < n <= 2 m const int m = 1 << (31 - __builtin_clz(n - 1)); assert(m << 1 <= LIM_POLY); Poly gs = fs.inv(m); // 5 E(n) gs.resize(m << 1); fft(gs.data(), m << 1); // 1 E(n) memcpy(polyWork0, data(), min(m, size()) * sizeof(Mint)); memset(polyWork0 + min(m, size()), 0, ((m << 1) - min(m, size())) * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= gs[i]; invFft(polyWork0, m << 1); // 1 E(n) Poly hs(n); memcpy(hs.data(), polyWork0, m * sizeof(Mint)); memset(polyWork0 + m, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) memcpy(polyWork1, fs.data(), min(m << 1, fs.size()) * sizeof(Mint)); memset(polyWork1 + min(m << 1, fs.size()), 0, ((m << 1) - min(m << 1, fs.size())) * sizeof(Mint)); fft(polyWork1, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 1 E(n) memset(polyWork0, 0, m * sizeof(Mint)); for (int i = m, i0 = min(m << 1, size()); i < i0; ++i) polyWork0[i] -= (*this)[i]; fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= gs[i]; invFft(polyWork0, m << 1); // 1 E(n) for (int i = m; i < n; ++i) hs[i] = -polyWork0[i]; return hs; } // (4 (floor(log_2 k) - ceil(log_2 |f|)) + 16) E(|f|) for |t| < |f| // [x^k] (t(x) / f(x)) = [x^k] ((t(x) f(-x)) / (f(x) f(-x)) // polyWork0: half of (2 m)-th roots of unity, inversed, bit-reversed Mint divAt(const Poly &fs, long long k) const { assert(k >= 0); if (size() >= fs.size()) { const Poly qs = *this / fs; // 13 E(deg(t) - deg(f) + 1) Poly rs = *this - fs * qs; // 3 E(|t|) rs.resize(rs.deg() + 1); return qs.at(k) + rs.divAt(fs, k); } int h = 0, m = 1; for (; m < fs.size(); ++h, m <<= 1) {} if (k < m) { const Poly gs = fs.inv(k + 1); // 10 E(|f|) Mint sum; for (int i = 0, i0 = min<int>(k + 1, size()); i < i0; ++i) sum += (*this)[i] * gs[k - i]; return sum; } assert(m << 1 <= LIM_POLY); polyWork0[0] = Mint(2U).inv(); for (int hh = 0; hh < h; ++hh) for (int i = 0; i < 1 << hh; ++i) polyWork0[1 << hh | i] = polyWork0[i] * INV_FFT_ROOTS[hh + 2]; const Mint a = FFT_ROOTS[h + 1]; memcpy(polyWork2, data(), size() * sizeof(Mint)); memset(polyWork2 + size(), 0, ((m << 1) - size()) * sizeof(Mint)); fft(polyWork2, m << 1); // 2 E(|f|) memcpy(polyWork1, fs.data(), fs.size() * sizeof(Mint)); memset(polyWork1 + fs.size(), 0, ((m << 1) - fs.size()) * sizeof(Mint)); fft(polyWork1, m << 1); // 2 E(|f|) for (; ; ) { if (k & 1) { for (int i = 0; i < m; ++i) polyWork2[i] = polyWork0[i] * (polyWork2[i << 1 | 0] * polyWork1[i << 1 | 1] - polyWork2[i << 1 | 1] * polyWork1[i << 1 | 0]); } else { for (int i = 0; i < m; ++i) { polyWork2[i] = polyWork2[i << 1 | 0] * polyWork1[i << 1 | 1] + polyWork2[i << 1 | 1] * polyWork1[i << 1 | 0]; polyWork2[i].x = ((polyWork2[i].x & 1) ? (polyWork2[i].x + MO) : polyWork2[i].x) >> 1; } } for (int i = 0; i < m; ++i) polyWork1[i] = polyWork1[i << 1 | 0] * polyWork1[i << 1 | 1]; if ((k >>= 1) < m) { invFft(polyWork2, m); // 1 E(|f|) invFft(polyWork1, m); // 1 E(|f|) // Poly::inv does not use polyWork2 const Poly gs = Poly(vector<Mint>(polyWork1, polyWork1 + k + 1)).inv(k + 1); // 10 E(|f|) Mint sum; for (int i = 0; i <= k; ++i) sum += polyWork2[i] * gs[k - i]; return sum; } memcpy(polyWork2 + m, polyWork2, m * sizeof(Mint)); invFft(polyWork2 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|) memcpy(polyWork1 + m, polyWork1, m * sizeof(Mint)); invFft(polyWork1 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|) Mint aa = 1; for (int i = m; i < m << 1; ++i) { polyWork2[i] *= aa; polyWork1[i] *= aa; aa *= a; } fft(polyWork2 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|) fft(polyWork1 + m, m); // (floor(log_2 k) - ceil(log_2 |f|)) E(|f|) } } // 13 E(n) // D log(t) = (D t) / t Poly log(int n) const { assert(!empty()); assert((*this)[0].x == 1U); assert(n <= LIM_INV); Poly fs = mod(n); for (int i = 0; i < fs.size(); ++i) fs[i] *= i; fs = fs.div(*this, n); for (int i = 1; i < n; ++i) fs[i] *= ::inv[i]; return fs; } // (16 + 1/2) E(n) // f = exp(t) mod x^m ==> (D f) / f == D t (mod x^m) // g = (1 / exp(t)) mod x^m // f <- f - (log f - t) / (1 / f) // = f - (I ((D f) / f) - t) f // == f - (I ((D f) / f + (f g - 1) ((D f) / f - D (t mod x^m))) - t) f (mod x^(2m)) // = f - (I (g (D f - f D (t mod x^m)) + D (t mod x^m)) - t) f // g <- g - (f g - 1) g // polyWork1: DFT(f, 2 m), polyWork2: g, polyWork3: DFT(g, 2 m) Poly exp(int n) const { assert(!empty()); assert(!(*this)[0]); assert(1 <= n); assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= min(LIM_INV, LIM_POLY)); if (n == 1) return {1U}; if (n == 2) return {1U, at(1)}; Poly fs(n); fs[0].x = polyWork1[0].x = polyWork1[1].x = polyWork2[0].x = 1U; int m; for (m = 1; m << 1 < n; m <<= 1) { for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork0[i] = i * (*this)[i]; memset(polyWork0 + min(m, size()), 0, (m - min(m, size())) * sizeof(Mint)); fft(polyWork0, m); // (1/2) E(n) for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m); // (1/2) E(n) for (int i = 0; i < m; ++i) polyWork0[i] -= i * fs[i]; memset(polyWork0 + m, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) memcpy(polyWork3, polyWork2, m * sizeof(Mint)); memset(polyWork3 + m, 0, m * sizeof(Mint)); fft(polyWork3, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i]; invFft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m; ++i) polyWork0[i] *= ::inv[m + i]; for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork0[i] += (*this)[m + i]; memset(polyWork0 + m, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 1 E(n) memcpy(fs.data() + m, polyWork0, m * sizeof(Mint)); memcpy(polyWork1, fs.data(), (m << 1) * sizeof(Mint)); memset(polyWork1 + (m << 1), 0, (m << 1) * sizeof(Mint)); fft(polyWork1, m << 2); // 2 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] = polyWork1[i] * polyWork3[i]; invFft(polyWork0, m << 1); // 1 E(n) memset(polyWork0, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i]; invFft(polyWork0, m << 1); // 1 E(n) for (int i = m; i < m << 1; ++i) polyWork2[i] = -polyWork0[i]; } for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork0[i] = i * (*this)[i]; memset(polyWork0 + min(m, size()), 0, (m - min(m, size())) * sizeof(Mint)); fft(polyWork0, m); // (1/2) E(n) for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m); // (1/2) E(n) for (int i = 0; i < m; ++i) polyWork0[i] -= i * fs[i]; memcpy(polyWork0 + m, polyWork0 + (m >> 1), (m >> 1) * sizeof(Mint)); memset(polyWork0 + (m >> 1), 0, (m >> 1) * sizeof(Mint)); memset(polyWork0 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint)); fft(polyWork0, m); // (1/2) E(n) fft(polyWork0 + m, m); // (1/2) E(n) memcpy(polyWork3 + m, polyWork2 + (m >> 1), (m >> 1) * sizeof(Mint)); memset(polyWork3 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint)); fft(polyWork3 + m, m); // (1/2) E(n) for (int i = 0; i < m; ++i) polyWork0[m + i] = polyWork0[i] * polyWork3[m + i] + polyWork0[m + i] * polyWork3[i]; for (int i = 0; i < m; ++i) polyWork0[i] *= polyWork3[i]; invFft(polyWork0, m); // (1/2) E(n) invFft(polyWork0 + m, m); // (1/2) E(n) for (int i = 0; i < m >> 1; ++i) polyWork0[(m >> 1) + i] += polyWork0[m + i]; for (int i = 0; i < m; ++i) polyWork0[i] *= ::inv[m + i]; for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork0[i] += (*this)[m + i]; memset(polyWork0 + m, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 1 E(n) memcpy(fs.data() + m, polyWork0, (n - m) * sizeof(Mint)); return fs; } // (29 + 1/2) E(n) // g <- g - (log g - a log t) g Poly pow(Mint a, int n) const { assert(!empty()); assert((*this)[0].x == 1U); assert(1 <= n); return (a * log(n)).exp(n); // 13 E(n) + (16 + 1/2) E(n) } // (29 + 1/2) E(n - a ord(t)) Poly pow(long long a, int n) const { assert(a >= 0); assert(1 <= n); if (a == 0) { Poly gs(n); gs[0].x = 1U; return gs; } const int o = ord(); if (o == -1 || o > (n - 1) / a) return Poly(n); const Mint b = (*this)[o].inv(), c = (*this)[o].pow(a); const int ntt = min<int>(n - a * o, size() - o); Poly tts(ntt); for (int i = 0; i < ntt; ++i) tts[i] = b * (*this)[o + i]; tts = tts.pow(Mint(a), n - a * o); // (29 + 1/2) E(n - a ord(t)) Poly gs(n); for (int i = 0; i < n - a * o; ++i) gs[a * o + i] = c * tts[i]; return gs; } // (10 + 1/2) E(n) // f = t^(1/2) mod x^m, g = 1 / t^(1/2) mod x^m // f <- f - (f^2 - h) g / 2 // g <- g - (f g - 1) g // polyWork1: DFT(f, m), polyWork2: g, polyWork3: DFT(g, 2 m) Poly sqrt(int n) const { assert(!empty()); assert((*this)[0].x == 1U); assert(1 <= n); assert(n == 1 || 1 << (32 - __builtin_clz(n - 1)) <= LIM_POLY); if (n == 1) return {1U}; if (n == 2) return {1U, at(1) / 2}; Poly fs(n); fs[0].x = polyWork1[0].x = polyWork2[0].x = 1U; int m; for (m = 1; m << 1 < n; m <<= 1) { for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork1[i]; invFft(polyWork1, m); // (1/2) E(n) for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork1[i] -= (*this)[i]; for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork1[i] -= (*this)[m + i]; memset(polyWork1 + m, 0, m * sizeof(Mint)); fft(polyWork1, m << 1); // 1 E(n) memcpy(polyWork3, polyWork2, m * sizeof(Mint)); memset(polyWork3 + m, 0, m * sizeof(Mint)); fft(polyWork3, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork1[i] *= polyWork3[i]; invFft(polyWork1, m << 1); // 1 E(n) for (int i = 0; i < m; ++i) { polyWork1[i] = -polyWork1[i]; fs[m + i].x = ((polyWork1[i].x & 1) ? (polyWork1[i].x + MO) : polyWork1[i].x) >> 1; } memcpy(polyWork1, fs.data(), (m << 1) * sizeof(Mint)); fft(polyWork1, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] = polyWork1[i] * polyWork3[i]; invFft(polyWork0, m << 1); // 1 E(n) memset(polyWork0, 0, m * sizeof(Mint)); fft(polyWork0, m << 1); // 1 E(n) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork3[i]; invFft(polyWork0, m << 1); // 1 E(n) for (int i = m; i < m << 1; ++i) polyWork2[i] = -polyWork0[i]; } for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork1[i]; invFft(polyWork1, m); // (1/2) E(n) for (int i = 0, i0 = min(m, size()); i < i0; ++i) polyWork1[i] -= (*this)[i]; for (int i = 0, i0 = min(m, size() - m); i < i0; ++i) polyWork1[i] -= (*this)[m + i]; memcpy(polyWork1 + m, polyWork1 + (m >> 1), (m >> 1) * sizeof(Mint)); memset(polyWork1 + (m >> 1), 0, (m >> 1) * sizeof(Mint)); memset(polyWork1 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint)); fft(polyWork1, m); // (1/2) E(n) fft(polyWork1 + m, m); // (1/2) E(n) memcpy(polyWork3 + m, polyWork2 + (m >> 1), (m >> 1) * sizeof(Mint)); memset(polyWork3 + m + (m >> 1), 0, (m >> 1) * sizeof(Mint)); fft(polyWork3 + m, m); // (1/2) E(n) // for (int i = 0; i < m << 1; ++i) polyWork1[i] *= polyWork3[i]; for (int i = 0; i < m; ++i) polyWork1[m + i] = polyWork1[i] * polyWork3[m + i] + polyWork1[m + i] * polyWork3[i]; for (int i = 0; i < m; ++i) polyWork1[i] *= polyWork3[i]; invFft(polyWork1, m); // (1/2) E(n) invFft(polyWork1 + m, m); // (1/2) E(n) for (int i = 0; i < m >> 1; ++i) polyWork1[(m >> 1) + i] += polyWork1[m + i]; for (int i = 0; i < n - m; ++i) { polyWork1[i] = -polyWork1[i]; fs[m + i].x = ((polyWork1[i].x & 1) ? (polyWork1[i].x + MO) : polyWork1[i].x) >> 1; } return fs; } // (10 + 1/2) E(n) // modSqrt must return a quadratic residue if exists, or anything otherwise. // Return {} if *this does not have a square root. template <class F> Poly sqrt(int n, F modSqrt) const { assert(1 <= n); const int o = ord(); if (o == -1) return Poly(n); if (o & 1) return {}; const Mint c = modSqrt((*this)[o]); if (c * c != (*this)[o]) return {}; if (o >> 1 >= n) return Poly(n); const Mint b = (*this)[o].inv(); const int ntt = min(n - (o >> 1), size() - o); Poly tts(ntt); for (int i = 0; i < ntt; ++i) tts[i] = b * (*this)[o + i]; tts = tts.sqrt(n - (o >> 1)); // (10 + 1/2) E(n) Poly gs(n); for (int i = 0; i < n - (o >> 1); ++i) gs[(o >> 1) + i] = c * tts[i]; return gs; } // 6 E(|t|) // x -> x + a Poly shift(const Mint &a) const { if (empty()) return {}; const int n = size(); int m = 1; for (; m < n; m <<= 1) {} for (int i = 0; i < n; ++i) polyWork0[i] = fac[i] * (*this)[i]; memset(polyWork0 + n, 0, ((m << 1) - n) * sizeof(Mint)); fft(polyWork0, m << 1); // 2 E(|t|) { Mint aa = 1; for (int i = 0; i < n; ++i) { polyWork1[n - 1 - i] = invFac[i] * aa; aa *= a; } } memset(polyWork1 + n, 0, ((m << 1) - n) * sizeof(Mint)); fft(polyWork1, m << 1); // 2 E(|t|) for (int i = 0; i < m << 1; ++i) polyWork0[i] *= polyWork1[i]; invFft(polyWork0, m << 1); // 2 E(|t|) Poly fs(n); for (int i = 0; i < n; ++i) fs[i] = invFac[i] * polyWork0[n - 1 + i]; return fs; } }; Mint linearRecurrenceAt(const vector<Mint> &as, const vector<Mint> &cs, long long k) { assert(!cs.empty()); assert(cs[0]); const int d = cs.size() - 1; assert(as.size() >= static_cast<size_t>(d)); return (Poly(vector<Mint>(as.begin(), as.begin() + d)) * cs).mod(d).divAt(cs, k); } struct SubproductTree { int logN, n, nn; vector<Mint> xs; // [DFT_4((X-xs[0])(X-xs[1])(X-xs[2])(X-xs[3]))] [(X-xs[0])(X-xs[1])(X-xs[2])(X-xs[3])mod X^4] // [ DFT_4((X-xs[0])(X-xs[1])) ] [ DFT_4((X-xs[2])(X-xs[3])) ] // [ DFT_2(X-xs[0]) ] [ DFT_2(X-xs[1]) ] [ DFT_2(X-xs[2]) ] [ DFT_2(X-xs[3]) ] vector<Mint> buf; vector<Mint *> gss; // (1 - xs[0] X) ... (1 - xs[nn-1] X) Poly all; SubproductTree() {} // (ceil(log_2 n) + O(1)) E(n) SubproductTree(const vector<Mint> &xs_) { n = xs_.size(); for (logN = 0, nn = 1; nn < n; ++logN, nn <<= 1) {} xs.assign(nn, 0U); memcpy(xs.data(), xs_.data(), n * sizeof(Mint)); buf.assign((logN + 1) * (nn << 1), 0U); gss.assign(nn << 1, nullptr); for (int h = 0; h <= logN; ++h) for (int u = 1 << h; u < 1 << (h + 1); ++u) { gss[u] = buf.data() + (h * (nn << 1) + ((u - (1 << h)) << (logN - h + 1))); } for (int i = 0; i < nn; ++i) { gss[nn + i][0] = -xs[i] + 1; gss[nn + i][1] = -xs[i] - 1; } if (nn == 1) gss[1][1] += 2; for (int h = logN; --h >= 0; ) { const int m = 1 << (logN - h); for (int u = 1 << (h + 1); --u >= 1 << h; ) { for (int i = 0; i < m; ++i) gss[u][i] = gss[u << 1][i] * gss[u << 1 | 1][i]; memcpy(gss[u] + m, gss[u], m * sizeof(Mint)); invFft(gss[u] + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n) if (h > 0) { gss[u][m] -= 2; const Mint a = FFT_ROOTS[logN - h + 1]; Mint aa = 1; for (int i = m; i < m << 1; ++i) { gss[u][i] *= aa; aa *= a; }; fft(gss[u] + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n) } } } all.resize(nn + 1); all[0] = 1; for (int i = 1; i < nn; ++i) all[i] = gss[1][nn + nn - i]; all[nn] = gss[1][nn] - 1; } // ((3/2) ceil(log_2 n) + O(1)) E(n) + 10 E(|f|) + 3 E(|f| + 2^(ceil(log_2 n))) vector<Mint> multiEval(const Poly &fs) const { vector<Mint> work0(nn), work1(nn), work2(nn); { const int m = max(fs.size(), 1); auto invAll = all.inv(m); // 10 E(|f|) std::reverse(invAll.begin(), invAll.end()); int mm; for (mm = 1; mm < m - 1 + nn; mm <<= 1) {} invAll.resize(mm, 0U); fft(invAll); // E(|f| + 2^(ceil(log_2 n))) vector<Mint> ffs(mm, 0U); memcpy(ffs.data(), fs.data(), fs.size() * sizeof(Mint)); fft(ffs); // E(|f| + 2^(ceil(log_2 n))) for (int i = 0; i < mm; ++i) ffs[i] *= invAll[i]; invFft(ffs); // E(|f| + 2^(ceil(log_2 n))) memcpy(((logN & 1) ? work1 : work0).data(), ffs.data() + m - 1, nn * sizeof(Mint)); } for (int h = 0; h < logN; ++h) { const int m = 1 << (logN - h); for (int u = 1 << h; u < 1 << (h + 1); ++u) { Mint *hs = (((logN - h) & 1) ? work1 : work0).data() + ((u - (1 << h)) << (logN - h)); Mint *hs0 = (((logN - h) & 1) ? work0 : work1).data() + ((u - (1 << h)) << (logN - h)); Mint *hs1 = hs0 + (m >> 1); fft(hs, m); // ((1/2) ceil(log_2 n) + O(1)) E(n) for (int i = 0; i < m; ++i) work2[i] = gss[u << 1 | 1][i] * hs[i]; invFft(work2.data(), m); // ((1/2) ceil(log_2 n) + O(1)) E(n) memcpy(hs0, work2.data() + (m >> 1), (m >> 1) * sizeof(Mint)); for (int i = 0; i < m; ++i) work2[i] = gss[u << 1][i] * hs[i]; invFft(work2.data(), m); // ((1/2) ceil(log_2 n) + O(1)) E(n) memcpy(hs1, work2.data() + (m >> 1), (m >> 1) * sizeof(Mint)); } } work0.resize(n); return work0; } // ((5/2) ceil(log_2 n) + O(1)) E(n) Poly interpolate(const vector<Mint> &ys) const { assert(static_cast<int>(ys.size()) == n); Poly gs(n); for (int i = 0; i < n; ++i) gs[i] = (i + 1) * all[n - (i + 1)]; const vector<Mint> denoms = multiEval(gs); // ((3/2) ceil(log_2 n) + O(1)) E(n) vector<Mint> work(nn << 1, 0U); for (int i = 0; i < n; ++i) { // xs[0], ..., xs[n - 1] are not distinct assert(denoms[i]); work[i << 1] = work[i << 1 | 1] = ys[i] / denoms[i]; } for (int h = logN; --h >= 0; ) { const int m = 1 << (logN - h); for (int u = 1 << (h + 1); --u >= 1 << h; ) { Mint *hs = work.data() + ((u - (1 << h)) << (logN - h + 1)); for (int i = 0; i < m; ++i) hs[i] = gss[u << 1 | 1][i] * hs[i] + gss[u << 1][i] * hs[m + i]; if (h > 0) { memcpy(hs + m, hs, m * sizeof(Mint)); invFft(hs + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n) const Mint a = FFT_ROOTS[logN - h + 1]; Mint aa = 1; for (int i = m; i < m << 1; ++i) { hs[i] *= aa; aa *= a; }; fft(hs + m, m); // ((1/2) ceil(log_2 n) + O(1)) E(n) } } } invFft(work.data(), nn); // E(n) return Poly(vector<Mint>(work.data() + nn - n, work.data() + nn)); } }; //////////////////////////////////////////////////////////////////////////////// // a(xL) ... a(xR-1) // a: d * d matrix of poly entry of size <= e // O(d^2 e^2 + d^2 sqrt(e (xR-xL)) log(e (xR-xL)) + d^3 sqrt(e (xR-xL))) time // Needs inv, fac, invFac for [0, e block), where // block: min power of 2 s.t. e block^2 >= xR-xL vector<vector<Mint>> polyMatrixProduct(const vector<vector<Poly>> &a, long long xL, long long xR) { const int d = a.size(); for (int i = 0; i < d; ++i) assert(static_cast<size_t>(d) == a[i].size()); assert(xL <= xR); int e = 1; for (int i = 0; i < d; ++i) for (int j = 0; j < d; ++j) { for (; e < a[i][j].deg() + 1; e <<= 1) {} } long long block = 1; for (; e * block * block < xR - xL; block <<= 1) {} assert(e * block <= LIM_INV); const Mint invBlock = Mint(block).inv(); // O(d^2 e^2) (more precisely, O(e \sum[i,j] a[i][j].size())) vector<Mint> b(d * d * e, 0); for (int i = 0; i < d; ++i) for (int j = 0; j < d; ++j) { for (int l = 0; l < e; ++l) { const Mint x = xL + block * l; Mint &y = b[(i * d + j) * e + l]; for (int m = a[i][j].size(); --m >= 0; ) (y *= x) += a[i][j][m]; } } // O(d^2 e block log(e block) + d^3 e block) for (int w = 1; w < block; w <<= 1) { // b[i][j]: product of w factors, // evaluated at (xL, xL + block, ..., xL + block (ew-1)) // for f: poly of size ew, given [f(0), ..., f(ew-1)], find: // - [f(ew), ..., f(2ew-1)] // - [f(w/block), ..., f(2ew-1+w/block)] const int ew = e * w; const int ew1 = ew << 1, ew2 = ew << 2; vector<Mint> workExp0(ew1, 0), workExp1(ew2, 0); vector<Mint> workInvExp0(ew1, 0); vector<Mint> workFall0(ew1, 0), workFall1(ew1, 0); for (int l = 0; l < ew; ++l) workExp0[l] = invFac[l]; for (int l = 0; l < ew1; ++l) workExp1[l] = invFac[l]; for (int l = 0; l < ew; ++l) workInvExp0[l] = (l & 1) ? -invFac[l] : invFac[l]; workFall0[ew - 1] = workFall1[ew - 1] = 1; for (int l = 1; l < ew; ++l) workFall0[ew - 1 - l] = workFall0[ew - l] * (ew - (l - 1)) * inv[l]; for (int l = 1; l < ew; ++l) workFall1[ew - 1 - l] = workFall1[ew - l] * (invBlock * w - (l - 1)) * inv[l]; fft(workExp0); fft(workExp1); fft(workInvExp0); fft(workFall0); fft(workFall1); vector<Mint> b0(d * d * ew), b1(d * d * ew1); for (int i = 0; i < d; ++i) for (int j = 0; j < d; ++j) { const Mint *bij = b.data() + (i * d + j) * ew; Mint *b0ij = b0.data() + (i * d + j) * ew; Mint *b1ij = b1.data() + (i * d + j) * ew1; vector<Mint> ys0(ew1); for (int l = 0; l < ew; ++l) ys0[l] = invFac[l] * bij[l]; fft(ys0); for (int l = 0; l < ew1; ++l) ys0[l] *= workInvExp0[l]; invFft(ys0); for (int l = 0; l < ew1; ++l) ys0[l] *= fac[l]; memset(ys0.data() + ew, 0, ew * sizeof(Mint)); fft(ys0); vector<Mint> ys1 = ys0; for (int l = 0; l < ew1; ++l) ys0[l] *= workFall0[l]; for (int l = 0; l < ew1; ++l) ys1[l] *= workFall1[l]; invFft(ys0); invFft(ys1); ys0.erase(ys0.begin(), ys0.begin() + (ew - 1)); ys1.erase(ys1.begin(), ys1.begin() + (ew - 1)); for (int l = 0; l < ew; ++l) ys0[l] *= invFac[l]; for (int l = 0; l < ew; ++l) ys1[l] *= invFac[l]; ys0.resize(ew1, 0); ys1.resize(ew2, 0); fft(ys0); fft(ys1); for (int l = 0; l < ew1; ++l) ys0[l] *= workExp0[l]; for (int l = 0; l < ew2; ++l) ys1[l] *= workExp1[l]; invFft(ys0); invFft(ys1); for (int l = 0; l < ew; ++l) b0ij[l] = fac[l] * ys0[l]; for (int l = 0; l < ew1; ++l) b1ij[l] = fac[l] * ys1[l]; } vector<Mint> bb(d * d * ew1, 0); for (int i = 0; i < d; ++i) for (int k = 0; k < d; ++k) for (int j = 0; j < d; ++j) { Mint *bbij = bb.data() + ((i * d + j) * ew1); const Mint *bik = b.data() + ((i * d + k) * ew); const Mint *b0ik = b0.data() + ((i * d + k) * ew); const Mint *b1kj = b1.data() + ((k * d + j) * ew1); for (int l = 0; l < ew; ++l) bbij[l] += bik[l] * b1kj[l]; for (int l = 0; l < ew; ++l) bbij[ew + l] += b0ik[l] * b1kj[ew + l]; } b = bb; } vector<Mint> c(d * d, 0); for (int i = 0; i < d; ++i) c[i * d + i] = 1; long long x = xL; // O(d^3 (xR-xL)/block) <= O(d^3 e block) for (int l = 0; x + block <= xR; ++l, x += block) { vector<Mint> cc(d * d, 0); for (int i = 0; i < d; ++i) for (int k = 0; k < d; ++k) for (int j = 0; j < d; ++j) { cc[i * d + j] += c[i * d + k] * b[(k * d + j) * e * block + l]; } c = cc; } // O(d^3 block + d^2 e block) for (; x < xR; ++x) { const Mint x_ = x; vector<Mint> ax(d * d, 0), cc(d * d, 0); for (int i = 0; i < d; ++i) for (int j = 0; j < d; ++j) { Mint &y = ax[i * d + j]; for (int m = a[i][j].size(); --m >= 0; ) (y *= x_) += a[i][j][m]; } for (int i = 0; i < d; ++i) for (int k = 0; k < d; ++k) for (int j = 0; j < d; ++j) { cc[i * d + j] += c[i * d + k] * ax[k * d + j]; } c = cc; } vector<vector<Mint>> ret(d, vector<Mint>(d)); for (int i = 0; i < d; ++i) for (int j = 0; j < d; ++j) ret[i][j] = c[i * d + j]; return ret; } //////////////////////////////////////////////////////////////////////////////// /* f(n, k) := k! [x^k] (1 + A x + B x^2)^n f(n, k) = A (n + 1 - k) f(n, k-1) + B (2 n + 2 - k) (k - 1) f(n, k-2) (f(n, 0) = 1, f(n, -1) := 0) f(n, k+1) = A (n - k) f(n, k) + B (2 n + 1 - k) k f(n, k-1) [ f(n, k+1) f(n, k) ] = [ A (n - k) 1 ] [ B (2 n + 1 - k) k 0 ] */ const Mint A = 2; const Mint B = Mint(2).inv(); Mint brute(Int N, Int K) { Mint f = 1, g = 0; for (Int k = 0; k < K; ++k) { const Mint h = g; g = f; f = A * (N - k) * g + B * (2 * N + 1 - k) * k * h; } return f; } int T; vector<Int> N, K; vector<Mint> ans; namespace small { struct Mat { Poly x[2][2]; Mat() : x{} {} friend ostream &operator<<(ostream &os, const Mat &a) { return os << "[" << a.x[0][0] << ", " << a.x[0][1] << "; " << a.x[1][0] << ", " << a.x[1][1] << "]"; } }; Mat operator*(const Mat &a, const Mat &b) { Mat c; for (int i = 0; i < 2; ++i) for (int k = 0; k < 2; ++k) for (int j = 0; j < 2; ++j) { c.x[i][j] += a.x[i][k] * b.x[k][j]; } return c; } vector<pair<int, int>> kts; vector<Poly> fss; vector<Mat> ms; inline int id(int l, int r) { return (l + 1 == r) ? l : (T + (l + r) / 2); } Mat sub(int kL, int kR) { if (kL == kR) { Mat ret; ret.x[0][0] = ret.x[1][1] = {1}; return ret; } else if (kL + 1 == kR) { const Mint k = kL; Mat ret; ret.x[0][0] = {A * -k, A}; ret.x[0][1] = {1}; ret.x[1][0] = {B * (1 - k) * k, B * 2 * k}; return ret; } else { const int kMid = (kL + kR) / 2; return sub(kL, kMid) * sub(kMid, kR); } } void dfs(int l, int r) { if (l + 1 == r) { fss[id(l, r)] = Poly{-N[kts[l].second], 1}; ms[id(l, r)] = sub(l ? kts[l - 1].first : 0, kts[l].first); } else { const int mid = (l + r) / 2; dfs(l, mid); dfs(mid, r); fss[id(l, r)] = fss[id(l, mid)] * fss[id(mid, r)]; ms[id(l, r)] = ms[id(l, mid)] * ms[id(mid, r)]; } // cerr<<"dfs "<<l<<" "<<r<<" "<<fss[id(l,r)]<<" "<<ms[id(l,r)]<<endl; } void DFS(int l, int r, const Mat &above_) { Mat above; for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) { above.x[i][j] = above_.x[i][j] % fss[id(l, r)]; } // cerr<<"DFS "<<l<<" "<<r<<" "<<above<<endl; if (l + 1 == r) { const int t = kts[l].second; const Mint n = N[t]; Mint as[2]; for (int j = 0; j < 2; ++j) { assert(above.x[0][j].size() <= 1); as[j] = above.x[0][j].at(0); } ans[t] = 0; for (int k = 0; k < 2; ++k) { const Poly &mk = ms[id(l, r)].x[k][0]; Mint tmp = 0; for (int h = mk.size(); --h >= 0; ) { (tmp *= n) += mk[h]; } ans[t] += as[k] * tmp; } } else { const int mid = (l + r) / 2; DFS(l, mid, above); DFS(mid, r, above * ms[id(l, mid)]); } } void run() { kts.resize(T); for (int t = 0; t < T; ++t) { kts[t] = make_pair(K[t], t); } sort(kts.begin(), kts.end()); vector<Mint> ns(T); for (int i = 0; i < T; ++i) { ns[i] = N[kts[i].second]; } // cerr<<"kts = "<<kts<<endl; // cerr<<"ns = "<<ns<<endl; fss.resize(2 * T); ms.resize(2 * T); dfs(0, T); // cerr<<"DONE dfs"<<endl; Mat ini; ini.x[0][0] = ini.x[1][1] = {1}; DFS(0, T, ini); #ifdef LOCAL vector<Mint>brt(T); for(int t=0;t<T;++t)brt[t]=brute(N[t],K[t]); cerr<<"brt = "<<brt<<endl; #endif } } // small namespace large { void run() { for (int t = 0; t < T; ++t) if (K[t] < MO) { const Mint n = N[t]; const vector<vector<Poly>> a{ {{A * n, -A}, {1}}, {{0, B * (2 * n + 1), -B}, {}}, }; const auto res = polyMatrixProduct(a, 0, K[t]); ans[t] = res[0][0]; } } } // large int main() { for (; ~scanf("%d", &T); ) { N.resize(T); K.resize(T); for (int t = 0; t < T; ++t) { scanf("%lld%lld", &N[t], &K[t]); } ans.assign(T, 0); const Int maxK = *max_element(K.begin(), K.end()); if (maxK <= 100'000) { small::run(); } else { large::run(); } for (int t = 0; t < T; ++t) { printf("%u\n", ans[t].x); } } return 0; }