#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using lint = long long; using pint = pair; using plint = pair; struct fast_ios { fast_ios(){ cin.tie(nullptr), ios::sync_with_stdio(false), cout << fixed << setprecision(20); }; } fast_ios_; #define ALL(x) (x).begin(), (x).end() #define FOR(i, begin, end) for(int i=(begin),i##_end_=(end);i=i##_begin_;i--) #define REP(i, n) FOR(i,0,n) #define IREP(i, n) IFOR(i,0,n) template void ndarray(vector& vec, const V& val, int len) { vec.assign(len, val); } template void ndarray(vector& vec, const V& val, int len, Args... args) { vec.resize(len), for_each(begin(vec), end(vec), [&](T& v) { ndarray(v, val, args...); }); } template bool chmax(T &m, const T q) { return m < q ? (m = q, true) : false; } template bool chmin(T &m, const T q) { return m > q ? (m = q, true) : false; } const std::vector> grid_dxs{{1, 0}, {-1, 0}, {0, 1}, {0, -1}}; int floor_lg(long long x) { return x <= 0 ? -1 : 63 - __builtin_clzll(x); } template T1 floor_div(T1 num, T2 den) { return (num > 0 ? num / den : -((-num + den - 1) / den)); } template std::pair operator+(const std::pair &l, const std::pair &r) { return std::make_pair(l.first + r.first, l.second + r.second); } template std::pair operator-(const std::pair &l, const std::pair &r) { return std::make_pair(l.first - r.first, l.second - r.second); } template std::vector sort_unique(std::vector vec) { sort(vec.begin(), vec.end()), vec.erase(unique(vec.begin(), vec.end()), vec.end()); return vec; } template int arglb(const std::vector &v, const T &x) { return std::distance(v.begin(), std::lower_bound(v.begin(), v.end(), x)); } template int argub(const std::vector &v, const T &x) { return std::distance(v.begin(), std::upper_bound(v.begin(), v.end(), x)); } template IStream &operator>>(IStream &is, std::vector &vec) { for (auto &v : vec) is >> v; return is; } template OStream &operator<<(OStream &os, const std::vector &vec); template OStream &operator<<(OStream &os, const std::array &arr); template OStream &operator<<(OStream &os, const std::unordered_set &vec); template OStream &operator<<(OStream &os, const pair &pa); template OStream &operator<<(OStream &os, const std::deque &vec); template OStream &operator<<(OStream &os, const std::set &vec); template OStream &operator<<(OStream &os, const std::multiset &vec); template OStream &operator<<(OStream &os, const std::unordered_multiset &vec); template OStream &operator<<(OStream &os, const std::pair &pa); template OStream &operator<<(OStream &os, const std::map &mp); template OStream &operator<<(OStream &os, const std::unordered_map &mp); template OStream &operator<<(OStream &os, const std::tuple &tpl); template OStream &operator<<(OStream &os, const std::vector &vec) { os << '['; for (auto v : vec) os << v << ','; os << ']'; return os; } template OStream &operator<<(OStream &os, const std::array &arr) { os << '['; for (auto v : arr) os << v << ','; os << ']'; return os; } template std::istream &operator>>(std::istream &is, std::tuple &tpl) { std::apply([&is](auto &&... args) { ((is >> args), ...);}, tpl); return is; } template OStream &operator<<(OStream &os, const std::tuple &tpl) { os << '('; std::apply([&os](auto &&... args) { ((os << args << ','), ...);}, tpl); return os << ')'; } template OStream &operator<<(OStream &os, const std::unordered_set &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template OStream &operator<<(OStream &os, const std::deque &vec) { os << "deq["; for (auto v : vec) os << v << ','; os << ']'; return os; } template OStream &operator<<(OStream &os, const std::set &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template OStream &operator<<(OStream &os, const std::multiset &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template OStream &operator<<(OStream &os, const std::unordered_multiset &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template OStream &operator<<(OStream &os, const std::pair &pa) { return os << '(' << pa.first << ',' << pa.second << ')'; } template OStream &operator<<(OStream &os, const std::map &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; } template OStream &operator<<(OStream &os, const std::unordered_map &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; } #ifdef HITONANODE_LOCAL const string COLOR_RESET = "\033[0m", BRIGHT_GREEN = "\033[1;32m", BRIGHT_RED = "\033[1;31m", BRIGHT_CYAN = "\033[1;36m", NORMAL_CROSSED = "\033[0;9;37m", RED_BACKGROUND = "\033[1;41m", NORMAL_FAINT = "\033[0;2m"; #define dbg(x) std::cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << std::endl #define dbgif(cond, x) ((cond) ? std::cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << std::endl : std::cerr) #else #define dbg(x) ((void)0) #define dbgif(cond, x) ((void)0) #endif #include #include #include template struct ModInt { #if __cplusplus >= 201402L #define MDCONST constexpr #else #define MDCONST #endif using lint = long long; MDCONST static int mod() { return md; } static int get_primitive_root() { static int primitive_root = 0; if (!primitive_root) { primitive_root = [&]() { std::set fac; int v = md - 1; for (lint i = 2; i * i <= v; i++) while (v % i == 0) fac.insert(i), v /= i; if (v > 1) fac.insert(v); for (int g = 1; g < md; g++) { bool ok = true; for (auto i : fac) if (ModInt(g).pow((md - 1) / i) == 1) { ok = false; break; } if (ok) return g; } return -1; }(); } return primitive_root; } int val_; int val() const noexcept { return val_; } MDCONST ModInt() : val_(0) {} MDCONST ModInt &_setval(lint v) { return val_ = (v >= md ? v - md : v), *this; } MDCONST ModInt(lint v) { _setval(v % md + md); } MDCONST explicit operator bool() const { return val_ != 0; } MDCONST ModInt operator+(const ModInt &x) const { return ModInt()._setval((lint)val_ + x.val_); } MDCONST ModInt operator-(const ModInt &x) const { return ModInt()._setval((lint)val_ - x.val_ + md); } MDCONST ModInt operator*(const ModInt &x) const { return ModInt()._setval((lint)val_ * x.val_ % md); } MDCONST ModInt operator/(const ModInt &x) const { return ModInt()._setval((lint)val_ * x.inv().val() % md); } MDCONST ModInt operator-() const { return ModInt()._setval(md - val_); } MDCONST ModInt &operator+=(const ModInt &x) { return *this = *this + x; } MDCONST ModInt &operator-=(const ModInt &x) { return *this = *this - x; } MDCONST ModInt &operator*=(const ModInt &x) { return *this = *this * x; } MDCONST ModInt &operator/=(const ModInt &x) { return *this = *this / x; } friend MDCONST ModInt operator+(lint a, const ModInt &x) { return ModInt()._setval(a % md + x.val_); } friend MDCONST ModInt operator-(lint a, const ModInt &x) { return ModInt()._setval(a % md - x.val_ + md); } friend MDCONST ModInt operator*(lint a, const ModInt &x) { return ModInt()._setval(a % md * x.val_ % md); } friend MDCONST ModInt operator/(lint a, const ModInt &x) { return ModInt()._setval(a % md * x.inv().val() % md); } MDCONST bool operator==(const ModInt &x) const { return val_ == x.val_; } MDCONST bool operator!=(const ModInt &x) const { return val_ != x.val_; } MDCONST bool operator<(const ModInt &x) const { return val_ < x.val_; } // To use std::map friend std::istream &operator>>(std::istream &is, ModInt &x) { lint t; return is >> t, x = ModInt(t), is; } MDCONST friend std::ostream &operator<<(std::ostream &os, const ModInt &x) { return os << x.val_; } MDCONST ModInt pow(lint n) const { ModInt ans = 1, tmp = *this; while (n) { if (n & 1) ans *= tmp; tmp *= tmp, n >>= 1; } return ans; } static std::vector facs, facinvs, invs; MDCONST static void _precalculation(int N) { int l0 = facs.size(); if (N > md) N = md; if (N <= l0) return; facs.resize(N), facinvs.resize(N), invs.resize(N); for (int i = l0; i < N; i++) facs[i] = facs[i - 1] * i; facinvs[N - 1] = facs.back().pow(md - 2); for (int i = N - 2; i >= l0; i--) facinvs[i] = facinvs[i + 1] * (i + 1); for (int i = N - 1; i >= l0; i--) invs[i] = facinvs[i] * facs[i - 1]; } MDCONST ModInt inv() const { if (this->val_ < std::min(md >> 1, 1 << 21)) { if (facs.empty()) facs = {1}, facinvs = {1}, invs = {0}; while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2); return invs[this->val_]; } else { return this->pow(md - 2); } } MDCONST ModInt fac() const { while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2); return facs[this->val_]; } MDCONST ModInt facinv() const { while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2); return facinvs[this->val_]; } MDCONST ModInt doublefac() const { lint k = (this->val_ + 1) / 2; return (this->val_ & 1) ? ModInt(k * 2).fac() / (ModInt(2).pow(k) * ModInt(k).fac()) : ModInt(k).fac() * ModInt(2).pow(k); } MDCONST ModInt nCr(const ModInt &r) const { return (this->val_ < r.val_) ? 0 : this->fac() * (*this - r).facinv() * r.facinv(); } MDCONST ModInt nPr(const ModInt &r) const { return (this->val_ < r.val_) ? 0 : this->fac() * (*this - r).facinv(); } ModInt sqrt() const { if (val_ == 0) return 0; if (md == 2) return val_; if (pow((md - 1) / 2) != 1) return 0; ModInt b = 1; while (b.pow((md - 1) / 2) == 1) b += 1; int e = 0, m = md - 1; while (m % 2 == 0) m >>= 1, e++; ModInt x = pow((m - 1) / 2), y = (*this) * x * x; x *= (*this); ModInt z = b.pow(m); while (y != 1) { int j = 0; ModInt t = y; while (t != 1) j++, t *= t; z = z.pow(1LL << (e - j - 1)); x *= z, z *= z, y *= z; e = j; } return ModInt(std::min(x.val_, md - x.val_)); } }; template std::vector> ModInt::facs = {1}; template std::vector> ModInt::facinvs = {1}; template std::vector> ModInt::invs = {0}; using mint = ModInt<998244353>; // Integer convolution for arbitrary mod // with NTT (and Garner's algorithm) for ModInt / ModIntRuntime class. // We skip Garner's algorithm if `skip_garner` is true or mod is in `nttprimes`. // input: a (size: n), b (size: m) // return: vector (size: n + m - 1) template std::vector nttconv(std::vector a, std::vector b, bool skip_garner); constexpr int nttprimes[3] = {998244353, 167772161, 469762049}; // Integer FFT (Fast Fourier Transform) for ModInt class // (Also known as Number Theoretic Transform, NTT) // is_inverse: inverse transform // ** Input size must be 2^n ** template void ntt(std::vector &a, bool is_inverse = false) { int n = a.size(); if (n == 1) return; static const int mod = MODINT::mod(); static const MODINT root = MODINT::get_primitive_root(); assert(__builtin_popcount(n) == 1 and (mod - 1) % n == 0); static std::vector w{1}, iw{1}; for (int m = w.size(); m < n / 2; m *= 2) { MODINT dw = root.pow((mod - 1) / (4 * m)), dwinv = 1 / dw; w.resize(m * 2), iw.resize(m * 2); for (int i = 0; i < m; i++) w[m + i] = w[i] * dw, iw[m + i] = iw[i] * dwinv; } if (!is_inverse) { for (int m = n; m >>= 1;) { for (int s = 0, k = 0; s < n; s += 2 * m, k++) { for (int i = s; i < s + m; i++) { MODINT x = a[i], y = a[i + m] * w[k]; a[i] = x + y, a[i + m] = x - y; } } } } else { for (int m = 1; m < n; m *= 2) { for (int s = 0, k = 0; s < n; s += 2 * m, k++) { for (int i = s; i < s + m; i++) { MODINT x = a[i], y = a[i + m]; a[i] = x + y, a[i + m] = (x - y) * iw[k]; } } } int n_inv = MODINT(n).inv().val(); for (auto &v : a) v *= n_inv; } } template std::vector> nttconv_(const std::vector &a, const std::vector &b) { int sz = a.size(); assert(a.size() == b.size() and __builtin_popcount(sz) == 1); std::vector> ap(sz), bp(sz); for (int i = 0; i < sz; i++) ap[i] = a[i], bp[i] = b[i]; ntt(ap, false); if (a == b) bp = ap; else ntt(bp, false); for (int i = 0; i < sz; i++) ap[i] *= bp[i]; ntt(ap, true); return ap; } long long garner_ntt_(int r0, int r1, int r2, int mod) { using mint2 = ModInt; static const long long m01 = 1LL * nttprimes[0] * nttprimes[1]; static const long long m0_inv_m1 = ModInt(nttprimes[0]).inv().val(); static const long long m01_inv_m2 = mint2(m01).inv().val(); int v1 = (m0_inv_m1 * (r1 + nttprimes[1] - r0)) % nttprimes[1]; auto v2 = (mint2(r2) - r0 - mint2(nttprimes[0]) * v1) * m01_inv_m2; return (r0 + 1LL * nttprimes[0] * v1 + m01 % mod * v2.val()) % mod; } template std::vector nttconv(std::vector a, std::vector b, bool skip_garner) { if (a.empty() or b.empty()) return {}; int sz = 1, n = a.size(), m = b.size(); while (sz < n + m) sz <<= 1; if (sz <= 16) { std::vector ret(n + m - 1); for (int i = 0; i < n; i++) { for (int j = 0; j < m; j++) ret[i + j] += a[i] * b[j]; } return ret; } int mod = MODINT::mod(); if (skip_garner or std::find(std::begin(nttprimes), std::end(nttprimes), mod) != std::end(nttprimes)) { a.resize(sz), b.resize(sz); if (a == b) { ntt(a, false); b = a; } else { ntt(a, false), ntt(b, false); } for (int i = 0; i < sz; i++) a[i] *= b[i]; ntt(a, true); a.resize(n + m - 1); } else { std::vector ai(sz), bi(sz); for (int i = 0; i < n; i++) ai[i] = a[i].val(); for (int i = 0; i < m; i++) bi[i] = b[i].val(); auto ntt0 = nttconv_(ai, bi); auto ntt1 = nttconv_(ai, bi); auto ntt2 = nttconv_(ai, bi); a.resize(n + m - 1); for (int i = 0; i < n + m - 1; i++) a[i] = garner_ntt_(ntt0[i].val(), ntt1[i].val(), ntt2[i].val(), mod); } return a; } template std::vector nttconv(const std::vector &a, const std::vector &b) { return nttconv(a, b, false); } // Formal Power Series (形式的冪級数) based on ModInt / ModIntRuntime // Reference: https://ei1333.github.io/luzhiled/snippets/math/formal-power-series.html template struct FormalPowerSeries : std::vector { using std::vector::vector; using P = FormalPowerSeries; void shrink() { while (this->size() and this->back() == T(0)) this->pop_back(); } P operator+(const P &r) const { return P(*this) += r; } P operator+(const T &v) const { return P(*this) += v; } P operator-(const P &r) const { return P(*this) -= r; } P operator-(const T &v) const { return P(*this) -= v; } P operator*(const P &r) const { return P(*this) *= r; } P operator*(const T &v) const { return P(*this) *= v; } P operator/(const P &r) const { return P(*this) /= r; } P operator/(const T &v) const { return P(*this) /= v; } P operator%(const P &r) const { return P(*this) %= r; } P &operator+=(const P &r) { if (r.size() > this->size()) this->resize(r.size()); for (int i = 0; i < (int)r.size(); i++) (*this)[i] += r[i]; shrink(); return *this; } P &operator+=(const T &v) { if (this->empty()) this->resize(1); (*this)[0] += v; shrink(); return *this; } P &operator-=(const P &r) { if (r.size() > this->size()) this->resize(r.size()); for (int i = 0; i < (int)r.size(); i++) (*this)[i] -= r[i]; shrink(); return *this; } P &operator-=(const T &v) { if (this->empty()) this->resize(1); (*this)[0] -= v; shrink(); return *this; } P &operator*=(const T &v) { for (auto &x : (*this)) x *= v; shrink(); return *this; } P &operator*=(const P &r) { if (this->empty() || r.empty()) this->clear(); else { auto ret = nttconv(*this, r); *this = P(ret.begin(), ret.end()); } return *this; } P &operator%=(const P &r) { *this -= *this / r * r; shrink(); return *this; } P operator-() const { P ret = *this; for (auto &v : ret) v = -v; return ret; } P &operator/=(const T &v) { assert(v != T(0)); for (auto &x : (*this)) x /= v; return *this; } P &operator/=(const P &r) { if (this->size() < r.size()) { this->clear(); return *this; } int n = (int)this->size() - r.size() + 1; return *this = (reversed().pre(n) * r.reversed().inv(n)).pre(n).reversed(n); } P pre(int sz) const { P ret(this->begin(), this->begin() + std::min((int)this->size(), sz)); ret.shrink(); return ret; } P operator>>(int sz) const { if ((int)this->size() <= sz) return {}; return P(this->begin() + sz, this->end()); } P operator<<(int sz) const { if (this->empty()) return {}; P ret(*this); ret.insert(ret.begin(), sz, T(0)); return ret; } P reversed(int deg = -1) const { assert(deg >= -1); P ret(*this); if (deg != -1) ret.resize(deg, T(0)); reverse(ret.begin(), ret.end()); ret.shrink(); return ret; } P differential() const { // formal derivative (differential) of f.p.s. const int n = (int)this->size(); P ret(std::max(0, n - 1)); for (int i = 1; i < n; i++) ret[i - 1] = (*this)[i] * T(i); return ret; } P integral() const { const int n = (int)this->size(); P ret(n + 1); ret[0] = T(0); for (int i = 0; i < n; i++) ret[i + 1] = (*this)[i] / T(i + 1); return ret; } P inv(int deg) const { assert(deg >= -1); assert(this->size() and ((*this)[0]) != T(0)); // Requirement: F(0) != 0 const int n = this->size(); if (deg == -1) deg = n; P ret({T(1) / (*this)[0]}); for (int i = 1; i < deg; i <<= 1) { auto h = (pre(i << 1) * ret).pre(i << 1) >> i; auto tmp = (-h * ret).pre(i); ret.insert(ret.end(), tmp.begin(), tmp.end()); ret.resize(i << 1); } ret = ret.pre(deg); ret.shrink(); return ret; } P log(int deg = -1) const { assert(deg >= -1); assert(this->size() and ((*this)[0]) == T(1)); // Requirement: F(0) = 1 const int n = (int)this->size(); if (deg == 0) return {}; if (deg == -1) deg = n; return (this->differential() * this->inv(deg)).pre(deg - 1).integral(); } P sqrt(int deg = -1) const { assert(deg >= -1); const int n = (int)this->size(); if (deg == -1) deg = n; if (this->empty()) return {}; if ((*this)[0] == T(0)) { for (int i = 1; i < n; i++) if ((*this)[i] != T(0)) { if ((i & 1) or deg - i / 2 <= 0) return {}; return (*this >> i).sqrt(deg - i / 2) << (i / 2); } return {}; } T sqrtf0 = (*this)[0].sqrt(); if (sqrtf0 == T(0)) return {}; P y = (*this) / (*this)[0], ret({T(1)}); T inv2 = T(1) / T(2); for (int i = 1; i < deg; i <<= 1) ret = (ret + y.pre(i << 1) * ret.inv(i << 1)) * inv2; return ret.pre(deg) * sqrtf0; } P exp(int deg = -1) const { assert(deg >= -1); assert(this->empty() or ((*this)[0]) == T(0)); // Requirement: F(0) = 0 const int n = (int)this->size(); if (deg == -1) deg = n; P ret({T(1)}); for (int i = 1; i < deg; i <<= 1) { ret = (ret * (pre(i << 1) + T(1) - ret.log(i << 1))).pre(i << 1); } return ret.pre(deg); } P pow(long long k, int deg = -1) const { assert(deg >= -1); const int n = (int)this->size(); if (deg == -1) deg = n; if (k == 0) { P ret(deg); if (deg >= 1) ret[0] = T(1); ret.shrink(); return ret; } for (int i = 0; i < n; i++) { if ((*this)[i] != T(0)) { T rev = T(1) / (*this)[i]; P C = (*this) * rev, D(n - i); for (int j = i; j < n; j++) D[j - i] = C.coeff(j); D = (D.log(deg) * T(k)).exp(deg) * (*this)[i].pow(k); if (__int128(k) * i > deg) return {}; P E(deg); long long S = i * k; for (int j = 0; j + S < deg and j < (int)D.size(); j++) E[j + S] = D[j]; E.shrink(); return E; } } return *this; } // Calculate f(X + c) from f(X), O(NlogN) P shift(T c) const { const int n = (int)this->size(); P ret = *this; for (int i = 0; i < n; i++) ret[i] *= T(i).fac(); std::reverse(ret.begin(), ret.end()); P exp_cx(n, 1); for (int i = 1; i < n; i++) exp_cx[i] = exp_cx[i - 1] * c / i; ret = (ret * exp_cx), ret.resize(n); std::reverse(ret.begin(), ret.end()); for (int i = 0; i < n; i++) ret[i] /= T(i).fac(); return ret; } T coeff(int i) const { if ((int)this->size() <= i or i < 0) return T(0); return (*this)[i]; } T eval(T x) const { T ret = 0, w = 1; for (auto &v : *this) ret += w * v, w *= x; return ret; } }; // Berlekamp–Massey algorithm // https://en.wikipedia.org/wiki/Berlekamp%E2%80%93Massey_algorithm // Complexity: O(N^2) // input: S = sequence from field K // return: L = degree of minimal polynomial, // C_reversed = monic min. polynomial (size = L + 1, reversed order, C_reversed[0] = 1)) // Formula: convolve(S, C_reversed)[i] = 0 for i >= L // Example: // - [1, 2, 4, 8, 16] -> (1, [1, -2]) // - [1, 1, 2, 3, 5, 8] -> (2, [1, -1, -1]) // - [0, 0, 0, 0, 1] -> (5, [1, 0, 0, 0, 0, 998244352]) (mod 998244353) // - [] -> (0, [1]) // - [0, 0, 0] -> (0, [1]) // - [-2] -> (1, [1, 2]) template std::pair> find_linear_recurrence(const std::vector &S) { int N = S.size(); using poly = std::vector; poly C_reversed{1}, B{1}; int L = 0, m = 1; Tfield b = 1; // adjust: C(x) <- C(x) - (d / b) x^m B(x) auto adjust = [](poly C, const poly &B, Tfield d, Tfield b, int m) -> poly { C.resize(std::max(C.size(), B.size() + m)); Tfield a = d / b; for (unsigned i = 0; i < B.size(); i++) C[i + m] -= a * B[i]; return C; }; for (int n = 0; n < N; n++) { Tfield d = S[n]; for (int i = 1; i <= L; i++) d += C_reversed[i] * S[n - i]; if (d == 0) m++; else if (2 * L <= n) { poly T = C_reversed; C_reversed = adjust(C_reversed, B, d, b, m); L = n + 1 - L; B = T; b = d; m = 1; } else C_reversed = adjust(C_reversed, B, d, b, m++); } return std::make_pair(L, C_reversed); } // Calculate ^N \bmod f(x)$ // Known as `Kitamasa method` // Input: f_reversed: monic, reversed (f_reversed[0] = 1) // Complexity: (K^2 \log N)$ ($: deg. of $) // Example: (4, [1, -1, -1]) -> [2, 3] // ( x^4 = (x^2 + x + 2)(x^2 - x - 1) + 3x + 2 ) // Reference: http://misawa.github.io/others/fast_kitamasa_method.html // http://sugarknri.hatenablog.com/entry/2017/11/18/233936 template std::vector monomial_mod_polynomial(long long N, const std::vector &f_reversed) { assert(!f_reversed.empty() and f_reversed[0] == 1); int K = f_reversed.size() - 1; if (!K) return {}; int D = 64 - __builtin_clzll(N); std::vector ret(K, 0); ret[0] = 1; for (int d = D; d--;) { ret = nttconv(ret, ret); for (int i = 2 * K - 2; i >= K; i--) { for (int j = 1; j <= K; j++) ret[i - j] -= ret[i] * f_reversed[j]; } ret.resize(K); if ((N >> d) & 1) { std::vector c(K); c[0] = -ret[K - 1] * f_reversed[K]; for (int i = 1; i < K; i++) { c[i] = ret[i - 1] - ret[K - 1] * f_reversed[K - i]; } ret = c; } } return ret; } // Guess k-th element of the sequence, assuming linear recurrence // initial_elements: 0-ORIGIN // Verify: abc198f https://atcoder.jp/contests/abc198/submissions/21837815 template Tfield guess_kth_term(const std::vector &initial_elements, long long k) { assert(k >= 0); if (k < static_cast(initial_elements.size())) return initial_elements[k]; const auto f = find_linear_recurrence(initial_elements).second; const auto g = monomial_mod_polynomial(k, f); Tfield ret = 0; for (unsigned i = 0; i < g.size(); i++) ret += g[i] * initial_elements[i]; return ret; } #include #include #include template std::vector gen_random_vector(int len) { static std::mt19937 mt(std::chrono::steady_clock::now().time_since_epoch().count()); static std::uniform_int_distribution rnd(1, ModInt::mod() - 1); std::vector ret(len); for (auto &x : ret) x = rnd(mt); return ret; }; // Probabilistic algorithm to find a solution of linear equation Ax = b if exists. // Complexity: O(n T(n) + n^2) // Reference: // [1] W. Eberly, E. Kaltofen, "On randomized Lanczos algorithms," Proc. of international symposium on // Symbolic and algebraic computation, 176-183, 1997. template std::vector linear_system_solver_lanczos(const Matrix &A, const std::vector &b) { assert(A.height() == int(b.size())); const int M = A.height(), N = A.width(); const std::vector D1 = gen_random_vector(N), D2 = gen_random_vector(M), v = gen_random_vector(N); auto applyD1 = [&D1](std::vector v) { for (int i = 0; i < int(v.size()); i++) v[i] *= D1[i]; return v; }; auto applyD2 = [&D2](std::vector v) { for (int i = 0; i < int(v.size()); i++) v[i] *= D2[i]; return v; }; auto applyAtilde = [&](std::vector v) -> std::vector { v = applyD1(v); v = A.prod(v); v = applyD2(v); v = A.prod_left(v); v = applyD1(v); return v; }; auto dot = [&](const std::vector &vl, const std::vector &vr) -> T { return std::inner_product(vl.begin(), vl.end(), vr.begin(), T(0)); }; auto scalar_vec = [&](const T &x, std::vector vec) -> std::vector { for (auto &v : vec) v *= x; return vec; }; auto btilde1 = applyD1(A.prod_left(applyD2(b))), btilde2 = applyAtilde(v); std::vector btilde(N); for (int i = 0; i < N; i++) btilde[i] = btilde1[i] + btilde2[i]; std::vector w0 = btilde, v1 = applyAtilde(w0); std::vector wm1(w0.size()), v0(v1.size()); T t0 = dot(v1, w0), gamma = dot(btilde, w0) / t0, tm1 = 1; std::vector x = scalar_vec(gamma, w0); while (true) { if (!t0 or !std::count_if(w0.begin(), w0.end(), [](T x) { return x != T(0); })) break; T alpha = dot(v1, v1) / t0, beta = dot(v1, v0) / tm1; std::vector w1(N); for (int i = 0; i < N; i++) w1[i] = v1[i] - alpha * w0[i] - beta * wm1[i]; std::vector v2 = applyAtilde(w1); T t1 = dot(w1, v2); gamma = dot(btilde, w1) / t1; for (int i = 0; i < N; i++) x[i] += gamma * w1[i]; wm1 = w0, w0 = w1; v0 = v1, v1 = v2; tm1 = t0, t0 = t1; } for (int i = 0; i < N; i++) x[i] -= v[i]; return applyD1(x); } // Probabilistic algorithm to calculate determinant of matrices // Complexity: O(n T(n) + n^2) // Reference: // [1] D. H. Wiedmann, "Solving sparse linear equations over finite fields," // IEEE Trans. on Information Theory, 32(1), 54-62, 1986. template Tp blackbox_determinant(const Matrix &M) { assert(M.height() == M.width()); const int N = M.height(); std::vector b = gen_random_vector(N), u = gen_random_vector(N), D = gen_random_vector(N); std::vector uMDib(2 * N); for (int i = 0; i < 2 * N; i++) { uMDib[i] = std::inner_product(u.begin(), u.end(), b.begin(), Tp(0)); for (int j = 0; j < N; j++) b[j] *= D[j]; b = M.prod(b); } auto ret = find_linear_recurrence(uMDib); Tp det = ret.second.back() * (N % 2 ? -1 : 1); Tp ddet = 1; for (auto d : D) ddet *= d; return det / ddet; } // Complexity: O(n T(n) + n^2) template std::vector reversed_minimal_polynomial_of_matrix(const Matrix &M) { assert(M.height() == M.width()); const int N = M.height(); std::vector b = gen_random_vector(N), u = gen_random_vector(N); std::vector uMb(2 * N); for (int i = 0; i < 2 * N; i++) { uMb[i] = std::inner_product(u.begin(), u.end(), b.begin(), Tp()); b = M.prod(b); } auto ret = find_linear_recurrence(uMb); return ret.second; } // Calculate A^k b // Complexity: O(n^2 log k + n T(n)) // Verified: https://www.codechef.com/submit/COUNTSEQ2 template std::vector blackbox_matrix_pow_vec(const Matrix &A, long long k, std::vector b) { assert(A.width() == int(b.size())); assert(k >= 0); std::vector rev_min_poly = reversed_minimal_polynomial_of_matrix(A); std::vector remainder = monomial_mod_polynomial(k, rev_min_poly); std::vector ret(b.size()); for (Tp c : remainder) { for (int d = 0; d < int(b.size()); ++d) ret[d] += b[d] * c; b = A.prod(b); } return ret; } // Sparse matrix template struct sparse_matrix { int H, W; std::vector>> vals; sparse_matrix(int H = 0, int W = 0) : H(H), W(W), vals(H) {} int height() const { return H; } int width() const { return W; } void add_element(int i, int j, Tp val) { assert(i >= 0 and i < H); assert(j >= 0 and i < W); vals[i].emplace_back(j, val); } std::vector prod(const std::vector &vec) const { assert(W == int(vec.size())); std::vector ret(H); for (int i = 0; i < H; i++) { for (const auto &p : vals[i]) ret[i] += p.second * vec[p.first]; } return ret; } std::vector prod_left(const std::vector &vec) const { assert(H == int(vec.size())); std::vector ret(W); for (int i = 0; i < H; i++) { for (const auto &p : vals[i]) ret[p.first] += p.second * vec[i]; } return ret; } std::vector> vecvec() const { std::vector> ret(H, std::vector(W)); for (int i = 0; i < H; i++) { for (auto p : vals[i]) ret[i][p.first] += p.second; } return ret; } }; int main() { int N; cin >> N; vector P(N + 1, -1); vector> child(N + 1); FOR(i, 1, N + 1) cin >> P.at(i), child.at(P.at(i)).push_back(i); vector W(N + 1); FOR(i, 1, N + 1) cin >> W.at(i); int Q; cin >> Q; sparse_matrix trans(N + 1 + Q, N + 1 + Q); REP(i, N + 1) { if (child.at(i).empty()) { trans.add_element(0, i, 1); } else { mint wsum = 0; for (int j : child.at(i)) wsum += W.at(j); for (int j : child.at(i)) trans.add_element(j, i, W.at(j) / wsum); } } vector> kaqs; REP(q, Q) { int a, k; cin >> a >> k; kaqs.emplace_back(k, a, q); trans.add_element(N + 1 + q, a, 1); trans.add_element(N + 1 + q, N + 1 + q, 1); } vector init(N + 1); init.front() = 1; for (auto [k, a, q] : kaqs) init.push_back(a == 0 ? -1 : 0); vector ret(Q); sort(ALL(kaqs)); int klast = -1; const auto rev_min_poly = reversed_minimal_polynomial_of_matrix(trans); vector> rems; REP(d, 1) { rems.push_back(monomial_mod_polynomial(1 << d, rev_min_poly)); } REP(_, 29) { auto v = nttconv(rems.back(), rems.back()); IFOR(d, rev_min_poly.size() - 1, v.size()) { const mint a = -v.at(d); REP(i, rev_min_poly.size()) v.at(d - i) += a * rev_min_poly.at(i); v.pop_back(); } rems.push_back(v); } for (auto [k_, a, q] : kaqs) { auto dk = k_ - klast; dbg(make_tuple(k_, dk, a, q)); klast = k_; if (dk) { // auto remainder = monomial_mod_polynomial(dk, rev_min_poly); REP(d, 30) { if ((dk >> d) & 1) { std::vector tmp(init.size()); for (auto c : rems.at(d)) { for (int i = 0; i < int(init.size()); ++i) tmp[i] += init[i] * c; init = trans.prod(init); } init = tmp; } } } ret.at(q) = init.at(N + 1 + q); } for (auto x : ret) cout << x << endl; }