#pragma region Macros #ifdef noimi #include "my_template.hpp" #else #pragma GCC target("avx2") #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef noimi #define oj_local(a, b) b #else #define oj_local(a, b) a #endif #define LOCAL if(oj_local(0, 1)) #define OJ if(oj_local(1, 0)) using namespace std; using ll = long long; using ull = unsigned long long int; using i128 = __int128_t; using pii = pair; using pll = pair; using ld = long double; template using vc = vector; template using vvc = vector>; template using vvvc = vector>; using vi = vc; using vl = vc; using vpi = vc; using vpl = vc; template using pq = priority_queue; template using pqg = priority_queue, greater>; template int si(const T &x) { return x.size(); } template inline bool chmax(T &a, const S &b) { return (a < b ? a = b, 1 : 0); } template inline bool chmin(T &a, const S &b) { return (a > b ? a = b, 1 : 0); } vi iota(int n) { vi a(n); return iota(a.begin(), a.end(), 0), a; } template vi iota(const vector &a, bool greater = false) { vi res(a.size()); iota(res.begin(), res.end(), 0); sort(res.begin(), res.end(), [&](int i, int j) { if(greater) return a[i] > a[j]; return a[i] < a[j]; }); return res; } // macros #define overload5(a, b, c, d, e, name, ...) name #define overload4(a, b, c, d, name, ...) name #define endl '\n' #define REP0(n) for(ll jidlsjf = 0; jidlsjf < n; ++jidlsjf) #define REP1(i, n) for(ll i = 0; i < (n); ++i) #define REP2(i, a, b) for(ll i = (a); i < (b); ++i) #define REP3(i, a, b, c) for(ll i = (a); i < (b); i += (c)) #define rep(...) overload4(__VA_ARGS__, REP3, REP2, REP1, REP0)(__VA_ARGS__) #define per0(n) for(int jidlsjf = 0; jidlsjf < (n); ++jidlsjf) #define per1(i, n) for(ll i = (n)-1; i >= 0; --i) #define per2(i, a, b) for(ll i = (a)-1; i >= b; --i) #define per3(i, a, b, c) for(ll i = (a)-1; i >= (b); i -= (c)) #define per(...) overload4(__VA_ARGS__, per3, per2, per1, per0)(__VA_ARGS__) #define fore0(a) rep(a.size()) #define fore1(i, a) for(auto &&i : a) #define fore2(a, b, v) for(auto &&[a, b] : v) #define fore3(a, b, c, v) for(auto &&[a, b, c] : v) #define fore4(a, b, c, d, v) for(auto &&[a, b, c, d] : v) #define fore(...) overload5(__VA_ARGS__, fore4, fore3, fore2, fore1, fore0)(__VA_ARGS__) #define setbits(j, n) for(ll iiiii = (n), j = lowbit(iiiii); iiiii; iiiii ^= 1 << j, j = lowbit(iiiii)) #define perm(v) for(bool permrepflag = true; (permrepflag ? exchange(permrepflag, false) : next_permutation(all(v)));) #define fi first #define se second #define pb push_back #define ppb pop_back #define ppf pop_front #define eb emplace_back #define drop(s) cout << #s << endl, exit(0) #define si(c) (int)(c).size() #define lb(c, x) distance((c).begin(), lower_bound(all(c), (x))) #define lbg(c, x) distance((c).begin(), lower_bound(all(c), (x), greater{})) #define ub(c, x) distance((c).begin(), upper_bound(all(c), (x))) #define ubg(c, x) distance((c).begin(), upper_bound(all(c), (x), greater{})) #define rng(v, l, r) v.begin() + (l), v.begin() + (r) #define all(c) begin(c), end(c) #define rall(c) rbegin(c), rend(c) #define SORT(v) sort(all(v)) #define REV(v) reverse(all(v)) #define UNIQUE(x) SORT(x), x.erase(unique(all(x)), x.end()) template T SUM(const S &v) { return accumulate(all(v), T(0)); } #define MIN(v) *min_element(all(v)) #define MAX(v) *max_element(all(v)) #define overload2(_1, _2, name, ...) name #define vec(type, name, ...) vector name(__VA_ARGS__) #define vv(type, name, h, ...) vector> name(h, vector(__VA_ARGS__)) #define vvv(type, name, h, w, ...) vector>> name(h, vector>(w, vector(__VA_ARGS__))) #define vvvv(type, name, a, b, c, ...) \ vector>>> name(a, vector>>(b, vector>(c, vector(__VA_ARGS__)))) constexpr pii dx4[4] = {pii{1, 0}, pii{0, 1}, pii{-1, 0}, pii{0, -1}}; constexpr pii dx8[8] = {{1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1}, {0, -1}, {1, -1}}; namespace yesno_impl { const string YESNO[2] = {"NO", "YES"}; const string YesNo[2] = {"No", "Yes"}; const string yesno[2] = {"no", "yes"}; const string firstsecond[2] = {"second", "first"}; const string FirstSecond[2] = {"Second", "First"}; const string possiblestr[2] = {"impossible", "possible"}; const string Possiblestr[2] = {"Impossible", "Possible"}; void YES(bool t = 1) { cout << YESNO[t] << endl; } void NO(bool t = 1) { YES(!t); } void Yes(bool t = 1) { cout << YesNo[t] << endl; } void No(bool t = 1) { Yes(!t); } void yes(bool t = 1) { cout << yesno[t] << endl; } void no(bool t = 1) { yes(!t); } void first(bool t = 1) { cout << firstsecond[t] << endl; } void First(bool t = 1) { cout << FirstSecond[t] << endl; } void possible(bool t = 1) { cout << possiblestr[t] << endl; } void Possible(bool t = 1) { cout << Possiblestr[t] << endl; } }; // namespace yesno_impl using namespace yesno_impl; #define INT(...) \ int __VA_ARGS__; \ IN(__VA_ARGS__) #define INTd(...) \ int __VA_ARGS__; \ IN2(__VA_ARGS__) #define LL(...) \ ll __VA_ARGS__; \ IN(__VA_ARGS__) #define LLd(...) \ ll __VA_ARGS__; \ IN2(__VA_ARGS__) #define STR(...) \ string __VA_ARGS__; \ IN(__VA_ARGS__) #define CHR(...) \ char __VA_ARGS__; \ IN(__VA_ARGS__) #define DBL(...) \ double __VA_ARGS__; \ IN(__VA_ARGS__) #define VEC(type, name, size) \ vector name(size); \ IN(name) #define VECd(type, name, size) \ vector name(size); \ IN2(name) #define VEC2(type, name1, name2, size) \ vector name1(size), name2(size); \ for(int i = 0; i < size; i++) IN(name1[i], name2[i]) #define VEC2d(type, name1, name2, size) \ vector name1(size), name2(size); \ for(int i = 0; i < size; i++) IN2(name1[i], name2[i]) #define VEC3(type, name1, name2, name3, size) \ vector name1(size), name2(size), name3(size); \ for(int i = 0; i < size; i++) IN(name1[i], name2[i], name3[i]) #define VEC3d(type, name1, name2, name3, size) \ vector name1(size), name2(size), name3(size); \ for(int i = 0; i < size; i++) IN2(name1[i], name2[i], name3[i]) #define VEC4(type, name1, name2, name3, name4, size) \ vector name1(size), name2(size), name3(size), name4(size); \ for(int i = 0; i < size; i++) IN(name1[i], name2[i], name3[i], name4[i]); #define VEC4d(type, name1, name2, name3, name4, size) \ vector name1(size), name2(size), name3(size), name4(size); \ for(int i = 0; i < size; i++) IN2(name1[i], name2[i], name3[i], name4[i]); #define VV(type, name, h, w) \ vector> name(h, vector(w)); \ IN(name) #define VVd(type, name, h, w) \ vector> name(h, vector(w)); \ IN2(name) int scan() { return getchar(); } void scan(int &a) { cin >> a; } void scan(long long &a) { cin >> a; } void scan(char &a) { cin >> a; } void scan(double &a) { cin >> a; } void scan(string &a) { cin >> a; } template void scan(pair &p) { scan(p.first), scan(p.second); } template void scan(vector &); template void scan(vector &a) { for(auto &i : a) scan(i); } template void scan(T &a) { cin >> a; } void IN() {} void IN2() {} template void IN(Head &head, Tail &...tail) { scan(head); IN(tail...); } template void IN2(Head &head, Tail &...tail) { scan(head); --head; IN2(tail...); } template void pat() {} template void pat(Head &h, Tail &...tail) { h += p; pat

(tail...); } template T ceil(T x, S y) { assert(y); return (y < 0 ? ceil(-x, -y) : (x > 0 ? (x + y - 1) / y : x / y)); } template T floor(T x, S y) { assert(y); return (y < 0 ? floor(-x, -y) : (x > 0 ? x / y : x / y - (x % y == 0 ? 0 : 1))); } template U bigmul(const T &x, const S &y, const U &lim) { // clamp(x * y, -lim, lim) if(x < 0 and y < 0) return bigmul(-x, -y, lim); if(x < 0) return -bigmul(-x, y, lim); if(y < 0) return -bigmul(x, -y, lim); return y == 0 or x <= lim / y ? x * y : lim; } template T POW(T x, int n) { T res = 1; for(; n; n >>= 1, x *= x) if(n & 1) res *= x; return res; } template T POW(T x, S n, const ll &mod) { T res = 1; x %= mod; for(; n; n >>= 1, x = x * x % mod) if(n & 1) res = res * x % mod; return res; } vector factor(ll x) { vector ans; for(ll i = 2; i * i <= x; i++) if(x % i == 0) { ans.push_back({i, 1}); while((x /= i) % i == 0) ans.back().second++; } if(x != 1) ans.push_back({x, 1}); return ans; } template vector divisor(T x) { vector ans; for(T i = 1; i * i <= x; i++) if(x % i == 0) { ans.pb(i); if(i * i != x) ans.pb(x / i); } return ans; } template void zip(vector &x) { vector y = x; UNIQUE(y); for(int i = 0; i < x.size(); ++i) { x[i] = lb(y, x[i]); } } template void fold_in(vector &v) {} template void fold_in(vector &v, Head &&a, Tail &&...tail) { for(auto e : a) v.emplace_back(e); fold_in(v, tail...); } template void renumber(vector &v) {} template void renumber(vector &v, Head &&a, Tail &&...tail) { for(auto &&e : a) e = lb(v, e); renumber(v, tail...); } template vector zip(vector &head, Args &&...args) { vector v; fold_in(v, head, args...); sort(all(v)), v.erase(unique(all(v)), v.end()); renumber(v, head, args...); return v; } template void rearrange(const vector &id) {} template void rearrange_exec(const vector &id, vector &v) { vector w(v.size()); rep(i, si(id)) w[i] = v[id[i]]; v.swap(w); } // 並び替える順番, 並び替える vector 達 template void rearrange(const vector &id, Head &a, Tail &...tail) { rearrange_exec(id, a); rearrange(id, tail...); } template vector RUI(const vector &v) { vector res(v.size() + 1); for(int i = 0; i < v.size(); i++) res[i + 1] = res[i] + v[i]; return res; } template void zeta_supersetsum(vector &f) { int n = f.size(); for(int i = 1; i < n; i <<= 1) rep(b, n) if(!(i & b)) f[b] += f[b | i]; } template void zeta_subsetsum(vector &f) { int n = f.size(); for(int i = 1; i < n; i <<= 1) rep(b, n) if(!(i & b)) f[b | i] += f[b]; } template void mobius_subset(vector &f) { int n = f.size(); for(int i = 1; i < n; i <<= 1) rep(b, n) if(!(i & b)) f[b] -= f[b | i]; } template void mobius_superset(vector &f) { int n = f.size(); for(int i = 1; i < n; i <<= 1) rep(b, n) if(!(i & b)) f[b | i] -= f[b]; } // 反時計周りに 90 度回転 template void rot(vector> &v) { if(empty(v)) return; int n = v.size(), m = v[0].size(); vector> res(m, vector(n)); rep(i, n) rep(j, m) res[m - 1 - j][i] = v[i][j]; v.swap(res); } vector counter(const vector &v, int max_num = -1) { if(max_num == -1) max_num = MAX(v); vector res(max_num + 1); fore(e, v) res[e]++; return res; } // x in [l, r) template bool inc(const T &x, const S &l, const S &r) { return l <= x and x < r; } template bool inc(const T &x, const pair &p) { return p.first <= x and x < p.second; } // 便利関数 constexpr ll ten(int n) { return n == 0 ? 1 : ten(n - 1) * 10; } constexpr ll tri(ll n) { return n * (n + 1) / 2; } // l + ... + r constexpr ll tri(ll l, ll r) { return (l + r) * (r - l + 1) / 2; } ll max(int x, ll y) { return max((ll)x, y); } ll max(ll x, int y) { return max(x, (ll)y); } int min(int x, ll y) { return min((ll)x, y); } int min(ll x, int y) { return min(x, (ll)y); } // bit 演算系 #define bit(i) (1LL << i) // (1 << i) #define test(b, i) (b >> i & 1) // b の i bit 目が立っているか ll pow2(int i) { return 1LL << i; } int topbit(signed t) { return t == 0 ? -1 : 31 - __builtin_clz(t); } int topbit(ll t) { return t == 0 ? -1 : 63 - __builtin_clzll(t); } int lowbit(signed a) { return a == 0 ? 32 : __builtin_ctz(a); } int lowbit(ll a) { return a == 0 ? 64 : __builtin_ctzll(a); } // int allbit(int n) { return (1 << n) - 1; } constexpr ll mask(int n) { return (1LL << n) - 1; } // int popcount(signed t) { return __builtin_popcount(t); } // int popcount(ll t) { return __builtin_popcountll(t); } int popcount(uint64_t t) { return __builtin_popcountll(t); } static inline uint64_t popcount64(uint64_t x) { uint64_t m1 = 0x5555555555555555ll; uint64_t m2 = 0x3333333333333333ll; uint64_t m4 = 0x0F0F0F0F0F0F0F0Fll; uint64_t h01 = 0x0101010101010101ll; x -= (x >> 1) & m1; x = (x & m2) + ((x >> 2) & m2); x = (x + (x >> 4)) & m4; return (x * h01) >> 56; } bool ispow2(int i) { return i && (i & -i) == i; } ll rnd(ll l, ll r) { //[l, r) #ifdef noimi static mt19937_64 gen; #else static mt19937_64 gen(chrono::steady_clock::now().time_since_epoch().count()); #endif return uniform_int_distribution(l, r - 1)(gen); } ll rnd(ll n) { return rnd(0, n); } template void random_shuffle(vc &a) { rep(i, si(a)) swap(a[i], a[rnd(0, i + 1)]); } int in() { int x; cin >> x; return x; } ll lin() { unsigned long long x; cin >> x; return x; } template pair operator-(const pair &x) { return pair(-x.first, -x.second); } template pair operator-(const pair &x, const pair &y) { return pair(x.fi - y.fi, x.se - y.se); } template pair operator+(const pair &x, const pair &y) { return pair(x.fi + y.fi, x.se + y.se); } template pair operator&(const pair &l, const pair &r) { return pair(max(l.fi, r.fi), min(l.se, r.se)); } template pair operator+=(pair &l, const pair &r) { return l = l + r; } template pair operator-=(pair &l, const pair &r) { return l = l - r; } template bool intersect(const pair &l, const pair &r) { return (l.se < r.se ? r.fi < l.se : l.fi < r.se); } template vector &operator++(vector &v) { fore(e, v) e++; return v; } template vector operator++(vector &v, int) { auto res = v; fore(e, v) e++; return res; } template vector &operator--(vector &v) { fore(e, v) e--; return v; } template vector operator--(vector &v, int) { auto res = v; fore(e, v) e--; return res; } template void connect(vector &l, const vector &r) { fore(e, r) l.eb(e); } template vector operator+(const vector &l, const vector &r) { vector res(max(si(l), si(r))); rep(i, si(l)) res[i] += l[i]; rep(i, si(r)) res[i] += r[i]; return res; } template vector operator-(const vector &l, const vector &r) { vector res(max(si(l), si(r))); rep(i, si(l)) res[i] += l[i]; rep(i, si(r)) res[i] -= r[i]; return res; } template vector &operator+=(const vector &l, const vector &r) { if(si(l) < si(r)) l.resize(si(r)); rep(i, si(r)) l[i] += r[i]; return l; } template vector &operator-=(const vector &l, const vector &r) { if(si(l) < si(r)) l.resize(si(r)); rep(i, si(r)) l[i] -= r[i]; return l; } template vector &operator+=(vector &v, const T &x) { fore(e, v) e += x; return v; } template vector &operator-=(vector &v, const T &x) { fore(e, v) e -= x; return v; } template struct edge { int from, to; T cost; int id; edge(int to, T cost) : from(-1), to(to), cost(cost) {} edge(int from, int to, T cost) : from(from), to(to), cost(cost) {} edge(int from, int to, T cost, int id) : from(from), to(to), cost(cost), id(id) {} constexpr bool operator<(const edge &rhs) const noexcept { return cost < rhs.cost; } edge &operator=(const int &x) { to = x; return *this; } operator int() const { return to; } friend ostream operator<<(ostream &os, const edge &e) { return os << e.to; } }; template using Edges = vector>; template Edges read_edges(int m, bool weighted = false) { Edges res; res.reserve(m); for(int i = 0; i < m; i++) { int u, v, c = 0; scan(u), scan(v), u--, v--; if(weighted) scan(c); res.eb(u, v, c, i); } return res; } using Tree = vector>; using Graph = vector>; template using Wgraph = vector>>; Graph getG(int n, int m = -1, bool directed = false, int margin = 1) { Tree res(n); if(m == -1) m = n - 1; while(m--) { int a, b; cin >> a >> b; a -= margin, b -= margin; res[a].emplace_back(b); if(!directed) res[b].emplace_back(a); } return res; } Graph getTreeFromPar(int n, int margin = 1) { Graph res(n); for(int i = 1; i < n; i++) { int a; cin >> a; res[a - margin].emplace_back(i); } return res; } template Wgraph getWg(int n, int m = -1, bool directed = false, int margin = 1) { Wgraph res(n); if(m == -1) m = n - 1; while(m--) { int a, b; T c; scan(a), scan(b), scan(c); a -= margin, b -= margin; res[a].emplace_back(b, c); if(!directed) res[b].emplace_back(a, c); } return res; } void add(Graph &G, int x, int y) { G[x].eb(y), G[y].eb(x); } template void add(Wgraph &G, int x, int y, T c) { G[x].eb(y, c), G[y].eb(x, c); } #define TEST \ INT(testcases); \ while(testcases--) i128 abs(const i128 &x) { return x > 0 ? x : -x; } istream &operator>>(istream &is, i128 &v) { string s; is >> s; v = 0; for(int i = 0; i < (int)s.size(); i++) { if(isdigit(s[i])) { v = v * 10 + s[i] - '0'; } } if(s[0] == '-') { v *= -1; } return is; } ostream &operator<<(ostream &os, const i128 &v) { if(v == 0) { return (os << "0"); } i128 num = v; if(v < 0) { os << '-'; num = -num; } string s; for(; num > 0; num /= 10) { s.push_back((char)(num % 10) + '0'); } reverse(s.begin(), s.end()); return (os << s); } namespace aux { template struct tp { static void output(std::ostream &os, const T &v) { os << std::get(v) << (&os == &cerr ? ", " : " "); tp::output(os, v); } }; template struct tp { static void output(std::ostream &os, const T &v) { os << std::get(v); } }; } // namespace aux template std::ostream &operator<<(std::ostream &os, const std::tuple &t) { if(&os == &cerr) { os << '('; } aux::tp, 0, sizeof...(Ts) - 1>::output(os, t); if(&os == &cerr) { os << ')'; } return os; } template std::ostream &operator<<(std::ostream &os, const priority_queue &_pq) { auto pq = _pq; vector res; while(!empty(pq)) res.emplace_back(pq.top()), pq.pop(); return os << res; } template ostream &operator<<(ostream &os, const pair &p) { if(&os == &cerr) { return os << "(" << p.first << ", " << p.second << ")"; } return os << p.first << " " << p.second; } template std::basic_ostream &operator<<(std::basic_ostream &os, const Container &x) { bool f = true; if(&os == &cerr) os << "["; for(auto &y : x) { if(&os == &cerr) os << (f ? "" : ", ") << y; else os << (f ? "" : " ") << y; f = false; } if(&os == &cerr) os << "]"; return os; } #define dump(...) static_cast(0) #define dbg(...) static_cast(0) void OUT() { cout << endl; } template void OUT(const Head &head, const Tail &...tail) { cout << head; if(sizeof...(tail)) cout << ' '; OUT(tail...); } template static constexpr T inf = numeric_limits::max() / 10; template constexpr pair inf> = {inf, inf}; template void OUT2(const T &t, T INF = inf, T res = -1) { OUT(t != INF ? t : res); } template void OUT2(vector &v, T INF = inf, T res = -1) { fore(e, v) if(e == INF) e = res; OUT(v); fore(e, v) if(e == res) e = INF; } template struct REC { F f; REC(F &&f_) : f(forward(f_)) {} template auto operator()(Args &&...args) const { return f(*this, forward(args)...); } }; template vector> runLength(const vector &v) { vector> res; for(auto &e : v) { if(res.empty() or res.back().fi != e) res.eb(e, 1); else res.back().se++; } return res; } vector> runLength(const string &v) { vector> res; for(auto &e : v) { if(res.empty() or res.back().fi != e) res.eb(e, 1); else res.back().se++; } return res; } struct string_converter { char start = 0; char type(const char &c) const { return (islower(c) ? 'a' : isupper(c) ? 'A' : isdigit(c) ? '0' : 0); } int convert(const char &c) { if(!start) start = type(c); return c - start; } int convert(const char &c, const string &chars) { return chars.find(c); } template auto convert(const T &v) { vector ret; ret.reserve(size(v)); for(auto &&e : v) ret.emplace_back(convert(e)); return ret; } template auto convert(const T &v, const string &chars) { vector ret; ret.reserve(size(v)); for(auto &&e : v) ret.emplace_back(convert(e, chars)); return ret; } int operator()(const char &v, char s = 0) { start = s; return convert(v); } int operator()(const char &v, const string &chars) { return convert(v, chars); } template auto operator()(const T &v, char s = 0) { start = s; return convert(v); } template auto operator()(const T &v, const string &chars) { return convert(v, chars); } } toint; template T bin_search(T ok, T ng, const F &f) { while(abs(ok - ng) > 1) { T mid = ok + ng >> 1; (f(mid) ? ok : ng) = mid; } return ok; } template T bin_search_double(T ok, T ng, const F &f, int iter = 80) { while(iter--) { T mid = (ok + ng) / 2; (f(mid) ? ok : ng) = mid; } return ok; } struct Setup_io { Setup_io() { ios_base::sync_with_stdio(0), cin.tie(0), cout.tie(0); cout << fixed << setprecision(11); } } setup_io; #endif #pragma endregion namespace Modular998 { template vector BerlekampMassey(const vector &s) { const int N = (int)s.size(); vector b, c; b.reserve(N + 1); c.reserve(N + 1); b.push_back(mint(1)); c.push_back(mint(1)); mint y = mint(1); for(int ed = 1; ed <= N; ed++) { int l = int(c.size()), m = int(b.size()); mint x = 0; for(int i = 0; i < l; i++) x += c[i] * s[ed - l + i]; b.emplace_back(mint(0)); m++; if(x == mint(0)) continue; mint freq = x / y; if(l < m) { auto tmp = c; c.insert(begin(c), m - l, mint(0)); for(int i = 0; i < m; i++) c[m - 1 - i] -= freq * b[m - 1 - i]; b = tmp; y = x; } else { for(int i = 0; i < m; i++) c[l - 1 - i] -= freq * b[m - 1 - i]; } } reverse(begin(c), end(c)); return c; } template struct LazyMontgomeryModInt { using mint = LazyMontgomeryModInt; using i32 = int32_t; using u32 = uint32_t; using u64 = uint64_t; static constexpr u32 get_r() { u32 ret = mod; for(i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret; return ret; } static constexpr u32 r = get_r(); static constexpr u32 n2 = -u64(mod) % mod; static_assert(r * mod == 1, "invalid, r * mod != 1"); static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30"); static_assert((mod & 1) == 1, "invalid, mod % 2 == 0"); u32 a; constexpr LazyMontgomeryModInt() : a(0) {} constexpr LazyMontgomeryModInt(const int64_t &b) : a(reduce(u64(b % mod + mod) * n2)){}; static constexpr u32 reduce(const u64 &b) { return (b + u64(u32(b) * u32(-r)) * mod) >> 32; } constexpr mint &operator+=(const mint &b) { if(i32(a += b.a - 2 * mod) < 0) a += 2 * mod; return *this; } constexpr mint &operator-=(const mint &b) { if(i32(a -= b.a) < 0) a += 2 * mod; return *this; } constexpr mint &operator*=(const mint &b) { a = reduce(u64(a) * b.a); return *this; } constexpr mint &operator/=(const mint &b) { *this *= b.inverse(); return *this; } constexpr mint operator+(const mint &b) const { return mint(*this) += b; } constexpr mint operator-(const mint &b) const { return mint(*this) -= b; } constexpr mint operator*(const mint &b) const { return mint(*this) *= b; } constexpr mint operator/(const mint &b) const { return mint(*this) /= b; } constexpr bool operator==(const mint &b) const { return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a); } constexpr bool operator!=(const mint &b) const { return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a); } constexpr mint operator-() const { return mint() - mint(*this); } constexpr mint pow(u64 n) const { mint ret(1), mul(*this); while(n > 0) { if(n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } constexpr mint inverse() const { return pow(mod - 2); } friend ostream &operator<<(ostream &os, const mint &b) { return os << b.get(); } friend istream &operator>>(istream &is, mint &b) { int64_t t; is >> t; b = LazyMontgomeryModInt(t); return (is); } constexpr u32 get() const { u32 ret = reduce(a); return ret >= mod ? ret - mod : ret; } static constexpr u32 get_mod() { return mod; } }; template struct NTT { static constexpr uint32_t get_pr() { uint32_t _mod = mint::get_mod(); using u64 = uint64_t; u64 ds[32] = {}; int idx = 0; u64 m = _mod - 1; for(u64 i = 2; i * i <= m; ++i) { if(m % i == 0) { ds[idx++] = i; while(m % i == 0) m /= i; } } if(m != 1) ds[idx++] = m; uint32_t _pr = 2; while(1) { int flg = 1; for(int i = 0; i < idx; ++i) { u64 a = _pr, b = (_mod - 1) / ds[i], r = 1; while(b) { if(b & 1) r = r * a % _mod; a = a * a % _mod; b >>= 1; } if(r == 1) { flg = 0; break; } } if(flg == 1) break; ++_pr; } return _pr; }; static constexpr uint32_t mod = mint::get_mod(); static constexpr uint32_t pr = get_pr(); static constexpr int level = __builtin_ctzll(mod - 1); mint dw[level], dy[level]; void setwy(int k) { mint w[level], y[level]; w[k - 1] = mint(pr).pow((mod - 1) / (1 << k)); y[k - 1] = w[k - 1].inverse(); for(int i = k - 2; i > 0; --i) w[i] = w[i + 1] * w[i + 1], y[i] = y[i + 1] * y[i + 1]; dw[1] = w[1], dy[1] = y[1], dw[2] = w[2], dy[2] = y[2]; for(int i = 3; i < k; ++i) { dw[i] = dw[i - 1] * y[i - 2] * w[i]; dy[i] = dy[i - 1] * w[i - 2] * y[i]; } } NTT() { setwy(level); } void fft4(vector &a, int k) { if((int)a.size() <= 1) return; if(k == 1) { mint a1 = a[1]; a[1] = a[0] - a[1]; a[0] = a[0] + a1; return; } if(k & 1) { int v = 1 << (k - 1); for(int j = 0; j < v; ++j) { mint ajv = a[j + v]; a[j + v] = a[j] - ajv; a[j] += ajv; } } int u = 1 << (2 + (k & 1)); int v = 1 << (k - 2 - (k & 1)); mint one = mint(1); mint imag = dw[1]; while(v) { // jh = 0 { int j0 = 0; int j1 = v; int j2 = j1 + v; int j3 = j2 + v; for(; j0 < v; ++j0, ++j1, ++j2, ++j3) { mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3]; mint t0p2 = t0 + t2, t1p3 = t1 + t3; mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag; a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3; a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3; } } // jh >= 1 mint ww = one, xx = one * dw[2], wx = one; for(int jh = 4; jh < u;) { ww = xx * xx, wx = ww * xx; int j0 = jh * v; int je = j0 + v; int j2 = je + v; for(; j0 < je; ++j0, ++j2) { mint t0 = a[j0], t1 = a[j0 + v] * xx, t2 = a[j2] * ww, t3 = a[j2 + v] * wx; mint t0p2 = t0 + t2, t1p3 = t1 + t3; mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag; a[j0] = t0p2 + t1p3, a[j0 + v] = t0p2 - t1p3; a[j2] = t0m2 + t1m3, a[j2 + v] = t0m2 - t1m3; } xx *= dw[__builtin_ctzll((jh += 4))]; } u <<= 2; v >>= 2; } } void ifft4(vector &a, int k) { if((int)a.size() <= 1) return; if(k == 1) { mint a1 = a[1]; a[1] = a[0] - a[1]; a[0] = a[0] + a1; return; } int u = 1 << (k - 2); int v = 1; mint one = mint(1); mint imag = dy[1]; while(u) { // jh = 0 { int j0 = 0; int j1 = v; int j2 = v + v; int j3 = j2 + v; for(; j0 < v; ++j0, ++j1, ++j2, ++j3) { mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3]; mint t0p1 = t0 + t1, t2p3 = t2 + t3; mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * imag; a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3; a[j1] = t0m1 + t2m3, a[j3] = t0m1 - t2m3; } } // jh >= 1 mint ww = one, xx = one * dy[2], yy = one; u <<= 2; for(int jh = 4; jh < u;) { ww = xx * xx, yy = xx * imag; int j0 = jh * v; int je = j0 + v; int j2 = je + v; for(; j0 < je; ++j0, ++j2) { mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v]; mint t0p1 = t0 + t1, t2p3 = t2 + t3; mint t0m1 = (t0 - t1) * xx, t2m3 = (t2 - t3) * yy; a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww; a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww; } xx *= dy[__builtin_ctzll(jh += 4)]; } u >>= 4; v <<= 2; } if(k & 1) { u = 1 << (k - 1); for(int j = 0; j < u; ++j) { mint ajv = a[j] - a[j + u]; a[j] += a[j + u]; a[j + u] = ajv; } } } void ntt(vector &a) { if((int)a.size() <= 1) return; fft4(a, __builtin_ctz(a.size())); } void intt(vector &a) { if((int)a.size() <= 1) return; ifft4(a, __builtin_ctz(a.size())); mint iv = mint(a.size()).inverse(); for(auto &x : a) x *= iv; } vector multiply(const vector &a, const vector &b) { int l = a.size() + b.size() - 1; if(min(a.size(), b.size()) <= 40) { vector s(l); for(int i = 0; i < (int)a.size(); ++i) for(int j = 0; j < (int)b.size(); ++j) s[i + j] += a[i] * b[j]; return s; } int k = 2, M = 4; while(M < l) M <<= 1, ++k; setwy(k); vector s(M), t(M); for(int i = 0; i < (int)a.size(); ++i) s[i] = a[i]; for(int i = 0; i < (int)b.size(); ++i) t[i] = b[i]; fft4(s, k); fft4(t, k); for(int i = 0; i < M; ++i) s[i] *= t[i]; ifft4(s, k); s.resize(l); mint invm = mint(M).inverse(); for(int i = 0; i < l; ++i) s[i] *= invm; return s; } void ntt_doubling(vector &a) { int M = (int)a.size(); auto b = a; intt(b); mint r = 1, zeta = mint(pr).pow((mint::get_mod() - 1) / (M << 1)); for(int i = 0; i < M; i++) b[i] *= r, r *= zeta; ntt(b); copy(begin(b), end(b), back_inserter(a)); } }; template struct FormalPowerSeries : vector { using vector::vector; using FPS = FormalPowerSeries; FPS &operator+=(const FPS &r) { if(r.size() > this->size()) this->resize(r.size()); for(int i = 0; i < (int)r.size(); i++) (*this)[i] += r[i]; return *this; } FPS &operator+=(const mint &r) { if(this->empty()) this->resize(1); (*this)[0] += r; return *this; } FPS &operator-=(const FPS &r) { if(r.size() > this->size()) this->resize(r.size()); for(int i = 0; i < (int)r.size(); i++) (*this)[i] -= r[i]; return *this; } FPS &operator-=(const mint &r) { if(this->empty()) this->resize(1); (*this)[0] -= r; return *this; } FPS &operator*=(const mint &v) { for(int k = 0; k < (int)this->size(); k++) (*this)[k] *= v; return *this; } FPS &operator/=(const FPS &r) { if(this->size() < r.size()) { this->clear(); return *this; } int n = this->size() - r.size() + 1; if((int)r.size() <= 64) { FPS f(*this), g(r); g.shrink(); mint coeff = g.back().inverse(); for(auto &x : g) x *= coeff; int deg = (int)f.size() - (int)g.size() + 1; int gs = g.size(); FPS quo(deg); for(int i = deg - 1; i >= 0; i--) { quo[i] = f[i + gs - 1]; for(int j = 0; j < gs; j++) f[i + j] -= quo[i] * g[j]; } *this = quo * coeff; this->resize(n, mint(0)); return *this; } return *this = ((*this).rev().pre(n) * r.rev().inv(n)).pre(n).rev(); } FPS &operator%=(const FPS &r) { *this -= *this / r * r; shrink(); return *this; } FPS operator+(const FPS &r) const { return FPS(*this) += r; } FPS operator+(const mint &v) const { return FPS(*this) += v; } FPS operator-(const FPS &r) const { return FPS(*this) -= r; } FPS operator-(const mint &v) const { return FPS(*this) -= v; } FPS operator*(const FPS &r) const { return FPS(*this) *= r; } FPS operator*(const mint &v) const { return FPS(*this) *= v; } FPS operator/(const FPS &r) const { return FPS(*this) /= r; } FPS operator%(const FPS &r) const { return FPS(*this) %= r; } FPS operator-() const { FPS ret(this->size()); for(int i = 0; i < (int)this->size(); i++) ret[i] = -(*this)[i]; return ret; } void shrink() { while(this->size() && this->back() == mint(0)) this->pop_back(); } FPS rev() const { FPS ret(*this); reverse(begin(ret), end(ret)); return ret; } FPS dot(FPS r) const { FPS ret(min(this->size(), r.size())); for(int i = 0; i < (int)ret.size(); i++) ret[i] = (*this)[i] * r[i]; return ret; } FPS pre(int sz) const { return FPS(begin(*this), begin(*this) + min((int)this->size(), sz)); } FPS operator>>(int sz) const { if((int)this->size() <= sz) return {}; FPS ret(*this); ret.erase(ret.begin(), ret.begin() + sz); return ret; } FPS operator<<(int sz) const { FPS ret(*this); ret.insert(ret.begin(), sz, mint(0)); return ret; } FPS diff() const { const int n = (int)this->size(); FPS ret(max(0, n - 1)); mint one(1), coeff(1); for(int i = 1; i < n; i++) { ret[i - 1] = (*this)[i] * coeff; coeff += one; } return ret; } FPS integral() const { const int n = (int)this->size(); FPS ret(n + 1); ret[0] = mint(0); if(n > 0) ret[1] = mint(1); auto mod = mint::get_mod(); for(int i = 2; i <= n; i++) ret[i] = (-ret[mod % i]) * (mod / i); for(int i = 0; i < n; i++) ret[i + 1] *= (*this)[i]; return ret; } mint eval(mint x) const { mint r = 0, w = 1; for(auto &v : *this) r += w * v, w *= x; return r; } FPS log(int deg = -1) const { assert((*this)[0] == mint(1)); if(deg == -1) deg = (int)this->size(); return (this->diff() * this->inv(deg)).pre(deg - 1).integral(); } FPS pow(int64_t k, int deg = -1) const { const int n = (int)this->size(); if(deg == -1) deg = n; for(int i = 0; i < n; i++) { if((*this)[i] != mint(0)) { if(i * k > deg) return FPS(deg, mint(0)); mint rev = mint(1) / (*this)[i]; FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg) * ((*this)[i].pow(k)); ret = (ret << (i * k)).pre(deg); if((int)ret.size() < deg) ret.resize(deg, mint(0)); return ret; } } return FPS(deg, mint(0)); } static void *ntt_ptr; static void set_fft(); FPS &operator*=(const FPS &r); void ntt(); void intt(); void ntt_doubling(); static int ntt_pr(); FPS inv(int deg = -1) const; FPS exp(int deg = -1) const; }; template void *FormalPowerSeries::ntt_ptr = nullptr; template void FormalPowerSeries::set_fft() { if(!ntt_ptr) ntt_ptr = new NTT; } template FormalPowerSeries &FormalPowerSeries::operator*=(const FormalPowerSeries &r) { if(this->empty() || r.empty()) { this->clear(); return *this; } set_fft(); auto ret = static_cast *>(ntt_ptr)->multiply(*this, r); return *this = FormalPowerSeries(ret.begin(), ret.end()); } template void FormalPowerSeries::ntt() { set_fft(); static_cast *>(ntt_ptr)->ntt(*this); } template void FormalPowerSeries::intt() { set_fft(); static_cast *>(ntt_ptr)->intt(*this); } template void FormalPowerSeries::ntt_doubling() { set_fft(); static_cast *>(ntt_ptr)->ntt_doubling(*this); } template int FormalPowerSeries::ntt_pr() { set_fft(); return static_cast *>(ntt_ptr)->pr; } template FormalPowerSeries FormalPowerSeries::inv(int deg) const { assert((*this)[0] != mint(0)); if(deg == -1) deg = (int)this->size(); FormalPowerSeries res(deg); res[0] = {mint(1) / (*this)[0]}; for(int d = 1; d < deg; d <<= 1) { FormalPowerSeries f(2 * d), g(2 * d); for(int j = 0; j < min((int)this->size(), 2 * d); j++) f[j] = (*this)[j]; for(int j = 0; j < d; j++) g[j] = res[j]; f.ntt(); g.ntt(); for(int j = 0; j < 2 * d; j++) f[j] *= g[j]; f.intt(); for(int j = 0; j < d; j++) f[j] = 0; f.ntt(); for(int j = 0; j < 2 * d; j++) f[j] *= g[j]; f.intt(); for(int j = d; j < min(2 * d, deg); j++) res[j] = -f[j]; } return res.pre(deg); } template FormalPowerSeries FormalPowerSeries::exp(int deg) const { using fps = FormalPowerSeries; assert((*this).size() == 0 || (*this)[0] == mint(0)); if(deg == -1) deg = this->size(); fps inv; inv.reserve(deg + 1); inv.push_back(mint(0)); inv.push_back(mint(1)); auto inplace_integral = [&](fps &F) -> void { const int n = (int)F.size(); auto mod = mint::get_mod(); while((int)inv.size() <= n) { int i = inv.size(); inv.push_back((-inv[mod % i]) * (mod / i)); } F.insert(begin(F), mint(0)); for(int i = 1; i <= n; i++) F[i] *= inv[i]; }; auto inplace_diff = [](fps &F) -> void { if(F.empty()) return; F.erase(begin(F)); mint coeff = 1, one = 1; for(int i = 0; i < (int)F.size(); i++) { F[i] *= coeff; coeff += one; } }; fps b{1, 1 < (int)this->size() ? (*this)[1] : 0}, c{1}, z1, z2{1, 1}; for(int m = 2; m < deg; m *= 2) { auto y = b; y.resize(2 * m); y.ntt(); z1 = z2; fps z(m); for(int i = 0; i < m; ++i) z[i] = y[i] * z1[i]; z.intt(); fill(begin(z), begin(z) + m / 2, mint(0)); z.ntt(); for(int i = 0; i < m; ++i) z[i] *= -z1[i]; z.intt(); c.insert(end(c), begin(z) + m / 2, end(z)); z2 = c; z2.resize(2 * m); z2.ntt(); fps x(begin(*this), begin(*this) + min(this->size(), m)); x.resize(m); inplace_diff(x); x.push_back(mint(0)); x.ntt(); for(int i = 0; i < m; ++i) x[i] *= y[i]; x.intt(); x -= b.diff(); x.resize(2 * m); for(int i = 0; i < m - 1; ++i) x[m + i] = x[i], x[i] = mint(0); x.ntt(); for(int i = 0; i < 2 * m; ++i) x[i] *= z2[i]; x.intt(); x.pop_back(); inplace_integral(x); for(int i = m; i < min(this->size(), 2 * m); ++i) x[i] += (*this)[i]; fill(begin(x), begin(x) + m, mint(0)); x.ntt(); for(int i = 0; i < 2 * m; ++i) x[i] *= y[i]; x.intt(); b.insert(end(b), begin(x) + m, end(x)); } return fps{begin(b), begin(b) + deg}; } /** * @brief 平方根 * @docs docs/fps/fps-sqrt.md */ template mint LinearRecurrence(long long k, FormalPowerSeries Q, FormalPowerSeries P) { Q.shrink(); mint ret = 0; if(P.size() >= Q.size()) { auto R = P / Q; P -= R * Q; P.shrink(); if(k < (int)R.size()) ret += R[k]; } if((int)P.size() == 0) return ret; FormalPowerSeries::set_fft(); if(FormalPowerSeries::ntt_ptr == nullptr) { P.resize((int)Q.size() - 1); while(k) { auto Q2 = Q; for(int i = 1; i < (int)Q2.size(); i += 2) Q2[i] = -Q2[i]; auto S = P * Q2; auto T = Q * Q2; if(k & 1) { for(int i = 1; i < (int)S.size(); i += 2) P[i >> 1] = S[i]; for(int i = 0; i < (int)T.size(); i += 2) Q[i >> 1] = T[i]; } else { for(int i = 0; i < (int)S.size(); i += 2) P[i >> 1] = S[i]; for(int i = 0; i < (int)T.size(); i += 2) Q[i >> 1] = T[i]; } k >>= 1; } return ret + P[0]; } else { int N = 1; while(N < (int)Q.size()) N <<= 1; P.resize(2 * N); Q.resize(2 * N); P.ntt(); Q.ntt(); vector S(2 * N), T(2 * N); vector btr(N); for(int i = 0, logn = __builtin_ctz(N); i < (1 << logn); i++) { btr[i] = (btr[i >> 1] >> 1) + ((i & 1) << (logn - 1)); } mint dw = mint(FormalPowerSeries::ntt_pr()).inverse().pow((mint::get_mod() - 1) / (2 * N)); while(k) { mint inv2 = mint(2).inverse(); // even degree of Q(x)Q(-x) T.resize(N); for(int i = 0; i < N; i++) T[i] = Q[(i << 1) | 0] * Q[(i << 1) | 1]; S.resize(N); if(k & 1) { // odd degree of P(x)Q(-x) for(auto &i : btr) { S[i] = (P[(i << 1) | 0] * Q[(i << 1) | 1] - P[(i << 1) | 1] * Q[(i << 1) | 0]) * inv2; inv2 *= dw; } } else { // even degree of P(x)Q(-x) for(int i = 0; i < N; i++) { S[i] = (P[(i << 1) | 0] * Q[(i << 1) | 1] + P[(i << 1) | 1] * Q[(i << 1) | 0]) * inv2; } } swap(P, S); swap(Q, T); k >>= 1; if(k < N) break; P.ntt_doubling(); Q.ntt_doubling(); } P.intt(); Q.intt(); return ret + (P * (Q.inv()))[k]; } } template mint kitamasa(long long N, FormalPowerSeries Q, FormalPowerSeries a) { assert(!Q.empty() && Q[0] != 0); if(N < (int)a.size()) return a[N]; assert((int)a.size() >= int(Q.size()) - 1); auto P = a.pre((int)Q.size() - 1) * Q; P.resize(Q.size() - 1); return LinearRecurrence(N, Q, P); } template mint nth_term(long long n, const vector &s) { using fps = FormalPowerSeries; auto bm = BerlekampMassey(s); return kitamasa(n, fps{begin(bm), end(bm)}, fps{begin(s), end(s)}); } /** * @brief 線形回帰数列の高速計算(Berlekamp-Massey/Bostan-Mori) * @docs docs/fps/nth-term.md */ using mint = LazyMontgomeryModInt<998244353>; using fps = FormalPowerSeries; using vmint = vector; template struct Binomial { vector f, g, h; Binomial(int MAX = 0) : f(1, T(1)), g(1, T(1)), h(1, T(1)) { while(MAX >= (int)f.size()) extend(); } void extend() { int n = f.size(); int m = n * 2; f.resize(m); g.resize(m); h.resize(m); for(int i = n; i < m; i++) f[i] = f[i - 1] * T(i); g[m - 1] = f[m - 1].inverse(); h[m - 1] = g[m - 1] * f[m - 2]; for(int i = m - 2; i >= n; i--) { g[i] = g[i + 1] * T(i + 1); h[i] = g[i] * f[i - 1]; } } T fac(int i) { if(i < 0) return T(0); while(i >= (int)f.size()) extend(); return f[i]; } T finv(int i) { if(i < 0) return T(0); while(i >= (int)g.size()) extend(); return g[i]; } T inv(int i) { if(i < 0) return -inv(-i); while(i >= (int)h.size()) extend(); return h[i]; } T C(int n, int r) { if(n < 0 || n < r || r < 0) return T(0); return fac(n) * finv(n - r) * finv(r); } inline T operator()(int n, int r) { return C(n, r); } template T multinomial(const vector &r) { static_assert(is_integral::value == true); int n = 0; for(auto &x : r) { if(x < 0) return T(0); n += x; } T res = fac(n); for(auto &x : r) res *= finv(x); return res; } template T operator()(const vector &r) { return multinomial(r); } T C_naive(int n, int r) { if(n < 0 || n < r || r < 0) return T(0); T ret = T(1); r = min(r, n - r); for(int i = 1; i <= r; ++i) ret *= inv(i) * (n--); return ret; } T P(int n, int r) { if(n < 0 || n < r || r < 0) return T(0); return fac(n) * finv(n - r); } T H(int n, int r) { if(n < 0 || r < 0) return T(0); return r == 0 ? 1 : C(n + r - 1, r); } }; Binomial binomial; mint inv(int i) { return binomial.inv(i); } mint C(int r, int c) { return binomial.C(r, c); } mint P(int r, int c) { return binomial.P(r, c); } mint fact(int r) { return binomial.fac(r); } mint ifact(int r) { return binomial.finv(r); } } // namespace Modular998 using namespace Modular998; template FormalPowerSeries mod_pow(int64_t k, const FormalPowerSeries &base, const FormalPowerSeries &d) { using fps = FormalPowerSeries; assert(!d.empty()); auto inv = d.rev().inv(); auto quo = [&](const fps &poly) { if(poly.size() < d.size()) return fps{}; int n = poly.size() - d.size() + 1; return (poly.rev().pre(n) * inv.pre(n)).pre(n).rev(); }; fps res{1}, b(base); while(k) { if(k & 1) { res *= b; res -= quo(res) * d; res.shrink(); } b *= b; b -= quo(b) * d; b.shrink(); k >>= 1; assert(b.size() + 1 <= d.size()); assert(res.size() + 1 <= d.size()); } return res; } /** * @brief Mod-Pow ($f(x)^k \mod g(x)$) */ pair, fps> extgcd(fps a, fps b) { fps x1 = {1}, y1 = {}, d1 = a; fps x2 = {}, y2 = {1}, d2 = b; while(true) { fps ret = d1 % d2; fps k = (d1 - ret) / d2; ret.shrink(); if(empty(ret)) return {{x2, y2}, d2}; // ax1 + by1 = d1 // ax2 + by2 = d2 // a(x1 - x2 * k) + b(y1 - y2 * k) = d1 - d2 * k = ret d1 = ret; x1 -= x2 * k, y1 -= y2 * k; x1.shrink(), y1.shrink(); swap(x1, x2), swap(y1, y2), swap(d1, d2); dump(si(x1), si(y1), si(d1)); dump(si(x2), si(y2), si(d2)); } // ax + by = s } int main() { INT(m, k, r); assert(inc(m, 2, 998244353)); assert(inc(k, 1, min(10000, m))); assert(inc(r, 0, m)); fps h(k + 1); mint im = -mint(m).inverse(), ik = mint(k).inverse(); h[0] = im; rep(i, 1, k + 1) h[i] = -ik * im; fps h2 = h * fps{1, -1}; fps t_over_h = fps{1} - mod_pow(m, fps{0, 1}, h2); t_over_h /= fps{1, -1}; auto [F, s] = extgcd(t_over_h, h); auto [p, y] = F; p /= s; p += {-1, 1}; h2 = -h2; dump(p); dump(h2); OUT(LinearRecurrence(r, h2, p) - LinearRecurrence(0, h2, p)); }