#line 1 "No_1145_Sums_of_Powers.cpp" #define YRSD // #include "YRS/aa/fast.hpp" #line 2 "YRS/all.hpp" #line 2 "YRS/aa/head.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TE template #define TES template #define Z auto #define ep emplace_back #define eb emplace #define fi first #define se second #define all(x) (x).begin(), (x).end() #define OV4(a, b, c, d, e, ...) e #define FOR1(a) for (int _ = 0; _ < (a); ++_) #define FOR2(i, a) for (int i = 0; i < (a); ++i) #define FOR3(i, a, b) for (int i = (a); i < (b); ++i) #define FOR4(i, a, b, c) for (int i = (a); i < (b); i += (c)) #define FOR(...) OV4(__VA_ARGS__, FOR4, FOR3, FOR2, FOR1)(__VA_ARGS__) #define FOR1_R(a) for (int _ = (a) - 1; _ >= 0; --_) #define FOR2_R(i, a) for (int i = (a) - 1; i >= 0; --i) #define FOR3_R(i, a, b) for (int i = (b) - 1; i >= (a); --i) #define FOR4_R(i, a, b, c) for (int i = (b) - 1; i >= (a); i -= (c)) #define FOR_R(...) OV4(__VA_ARGS__, FOR4_R, FOR3_R, FOR2_R, FOR1_R)(__VA_ARGS__) #define FOR_subset(t, s) for (int t = (s); t > -1; t = (t == 0 ? -1 : (t - 1) & s)) #define sort ranges::sort using namespace std; TE using vc = vector; TE using vvc = vc>; TE using T1 = tuple; TE using T2 = tuple; TE using T3 = tuple; TE using T4 = tuple; TE using max_heap = priority_queue; TE using min_heap = priority_queue, greater>; using u8 = unsigned char; using uint = unsigned int; using ll = long long; using ull = unsigned long long; using ld = long double; using i128 = __int128; using u128 = __uint128_t; using f128 = __float128; using u16 = uint16_t; using PII = pair; using PLL = pair; #ifdef YRSD constexpr bool dbg = 1; #else constexpr bool dbg = 0; #endif #line 2 "YRS/IO/IO.hpp" istream &operator>>(istream &I, i128 &x) { static string s; I >> s; int f = s[0] == '-'; x = 0; const int N = (int)s.size(); FOR(i, f, N) x = x * 10 + s[i] - '0'; if (f) x = -x; return I; } ostream &operator<<(ostream &O, i128 x) { static string s; s.clear(); bool f = x < 0; if (f) x = -x; while (x) s += '0' + x % 10, x /= 10; if (s.empty()) s += '0'; if (f) s += '-'; reverse(all(s)); return O << s; } istream &operator>>(istream &I, f128 &x) { static string s; I >> s, x = stold(s); return I; } ostream &operator<<(ostream &O, const f128 x) { return O << ld(x); } template istream &operator>>(istream &I, tuple &t) { return apply([&I](Z &...s) { ((I >> s), ...); }, t), I; } template istream &operator>>(istream &I, pair &x) { return I >> x.fi >> x.se; } template ostream &operator<<(ostream &O, const pair &x) { return O << x.fi << ' ' << x.se; } TE requires requires(T &c) { begin(c); end(c); } and (not is_same_v, string>) istream &operator>>(istream &I, T &c) { for (Z &e : c) I >> e; return I; } TE requires requires(const T &c) { begin(c); end(c); } and (not is_same_v, const char*>) and (not is_same_v, string>) and (not is_array_v> or not is_same_v>, char>) ostream &operator<<(ostream &O, const T &a) { if (a.empty()) return O; Z i = a.begin(); O << *i++; for (; i != a.end(); ++i) O << ' ' << *i; return O; } void IN() {} TE void IN(T &x, Z &...s) { cin >> x, IN(s...); } void print() { cout << '\n'; } TES void print(T &&x, S &&...y) { cout << x; if constexpr (sizeof...(S)) cout << ' '; print(forward(y)...); } void put() {} TES void put(T &&x, S &&...y) { cout << x; put(forward(y)...); } #define INT(...) int __VA_ARGS__; IN(__VA_ARGS__) #define UINT(...) uint __VA_ARGS__; IN(__VA_ARGS__) #define LL(...) ll __VA_ARGS__; IN(__VA_ARGS__) #define ULL(...) ull __VA_ARGS__; IN(__VA_ARGS__) #define I128(...) i128 __VA_ARGS__; IN(__VA_ARGS__) #define STR(...) string __VA_ARGS__; IN(__VA_ARGS__) #define CH(...) char __VA_ARGS__; IN(__VA_ARGS__) #define REAL(...) re __VA_ARGS__; IN(__VA_ARGS__) #define VEC(T, a, n) vc a(n); IN(a) void YES(bool o = 1) { print(o ? "YES" : "NO"); } void Yes(bool o = 1) { print(o ? "Yes" : "No"); } void yes(bool o = 1) { print(o ? "yes" : "no"); } void NO(bool o = 1) { YES(not o); } void No(bool o = 1) { Yes(not o); } void no(bool o = 1) { yes(not o); } void ALICE(bool o = 1) { print(o ? "ALICE" : "BOB"); } void Alice(bool o = 1) { print(o ? "Alice" : "Bob"); } void alice(bool o = 1) { print(o ? "alice" : "bob"); } void BOB(bool o = 1) { ALICE(not o); } void Bob(bool o = 1) { Alice(not o); } void bob(bool o = 1) { alice(not o); } void POSSIBLE(bool o = 1) { print(o ? "POSSIBLE" : "IMPOSSIBLE"); } void Possible(bool o = 1) { print(o ? "Possible" : "Impossible"); } void possible(bool o = 1) { print(o ? "possible" : "impossible"); } void IMPOSSIBLE(bool o = 1) { POSSIBLE(not o); } void Impossible(bool o = 1) { Possible(not o); } void impossible(bool o = 1) { possible(not o); } void TAK(bool o = 1) { print(o ? "TAK" : "NIE"); } void NIE(bool o = 1) { TAK(not o); } #line 5 "YRS/all.hpp" #if (__cplusplus >= 202002L) #include constexpr ld pi = numbers::pi_v; #endif TE constexpr T inf = numeric_limits::max(); template <> constexpr i128 inf = i128(inf) * 2'000'000'000'000'000'000; template constexpr pair inf> = {inf, inf}; TE constexpr static inline int pc(T x) { return popcount(make_unsigned_t(x)); } constexpr static inline ll len(const Z &a) { return a.size(); } void reverse(Z &a) { reverse(all(a)); } void unique(Z &a) { sort(a); a.erase(unique(all(a)), a.end()); } TE vc inverse(const vc &a) { int N = len(a); vc b(N, -1); FOR(i, N) if (a[i] != -1) b[a[i]] = i; return b; } Z QMAX(const Z &a) { return *max_element(all(a)); } Z QMIN(const Z &a) { return *min_element(all(a)); } TE Z QMAX(T l, T r) { return *max_element(l, r); } TE Z QMIN(T l, T r) { return *min_element(l, r); } constexpr bool chmax(Z &a, const Z &b) { return (a < b ? a = b, 1 : 0); } constexpr bool chmin(Z &a, const Z &b) { return (a > b ? a = b, 1 : 0); } vc argsort(const Z &a) { vc I(len(a)); iota(all(I), 0); sort(I, [&](int i, int k) { return a[i] < a[k] or (a[i] == a[k] and i < k); }); return I; } TE vc rearrange(const vc &a, const vc &I) { int N = len(I); vc b(N); FOR(i, N) b[i] = a[I[i]]; return b; } template vc pre_sum(const vc &a) { int N = len(a); vc c(N + 1); FOR(i, N) c[i + 1] = c[i] + a[i]; if (of == 0) c.erase(c.begin()); return c; } TE constexpr static int topbit(T x) { if (x == 0) return - 1; if constexpr (sizeof(T) <= 4) return 31 - __builtin_clz(x); else return 63 - __builtin_clzll(x); } TE constexpr static int lowbit(T x) { if (x == 0) return -1; if constexpr (sizeof(T) <= 4) return __builtin_ctz(x); else return __builtin_ctzll(x); } TE constexpr T floor(T x, T y) { return x / y - (x % y and (x ^ y) < 0); } TE constexpr T ceil(T x, T y) { return floor(x + y - 1, y); } TE constexpr T bmod(T x, T y) { return x - floor(x, y) * y; } TE constexpr pair divmod(T x, T y) { T q = floor(x, y); return pair{q, x - q * y}; } template T SUM(const Z &v) { return accumulate(all(v), T(0)); } int lb(const Z &a, Z x) { return lower_bound(all(a), x) - a.begin(); } TE int lb(T l, T r, Z x) { return lower_bound(l, r, x) - l; } int ub(const Z &a, Z x) { return upper_bound(all(a), x) - a.begin(); } TE int ub(T l, T r, Z x) { return upper_bound(l, r, x) - l; } template ll bina(Z f, ll l, ll r) { if constexpr (ck) assert(f(l)); while (abs(l - r) > 1) { ll x = (r + l) >> 1; (f(x) ? l : r) = x; } return l; } TE T bina_real(Z f, T l, T r, int c = 100) { while (c--) { T x = (l + r) / 2; (f(x) ? l : r) = x; } return (l + r) / 2; } Z pop(Z &s) { if constexpr (requires { s.pop_back(); }) { Z x = s.back(); return s.pop_back(), x; } else if constexpr (requires { s.top(); }) { Z x = s.top(); return s.pop(), x; } else { Z x = s.front(); return s.pop(), x; } } void setp(int x) { cout << fixed << setprecision(x); } TE inline void sh(vc &a, int N, T b = {}) { a.resize(N, b); } #line 1 "YRS/debug.hpp" #ifdef YRSD void DBG() { cerr << "]" << endl; } TES void DBG(T &&x, S &&...y) { cerr << x; if constexpr (sizeof...(S)) cerr << ", "; DBG(forward(y)...); } #define debug(...) cerr << "[" << __LINE__ << "]: [" #__VA_ARGS__ "] = [", DBG(__VA_ARGS__) void ERR() { cerr << endl; } TES void ERR(T &&x, S &&...y) { cerr << x; if constexpr (sizeof...(S)) cerr << ", "; ERR(forward(y)...); } #define err(...) cerr << "[" << __LINE__ << "]: ", ERR(__VA_ARGS__) #define asser assert #else #define debug(...) void(0721) #define err(...) void(0721) #endif #line 2 "YRS/IO/fast_io.hpp" #define FIO static constexpr uint SZ = 1 << 17; char ibuf[SZ]; char obuf[SZ]; char out[100]; // pointer of ibuf, obuf uint pil = 0, pir = 0, por = 0; struct Pre { char num[10000][4]; constexpr Pre() : num() { for (int i = 0; i < 10000; i++) { int n = i; for (int j = 3; j >= 0; j--) { num[i][j] = n % 10 | '0'; n /= 10; } } } } constexpr pre; inline void load() { memcpy(ibuf, ibuf + pil, pir - pil); pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin); pil = 0; if (pir < SZ) ibuf[pir++] = '\n'; } inline void flush() { fwrite(obuf, 1, por, stdout); por = 0; } inline void rd(char &c) { do { if (pil + 1 > pir) load(); c = ibuf[pil++]; } while (isspace(c)); } inline void rd(string &x) { x.clear(); char c; do { if (pil + 1 > pir) load(); c = ibuf[pil++]; } while (isspace(c)); do { x += c; if (pil == pir) load(); c = ibuf[pil++]; } while (!isspace(c)); } TE inline void rd_real(T &x) { string s; rd(s); x = stod(s); } TE inline void rd_integer(T &x) { if (pil + 100 > pir) load(); char c; do c = ibuf[pil++]; while (c < '-'); bool minus = 0; if constexpr (is_signed::value || is_same_v) { if (c == '-') { minus = 1, c = ibuf[pil++]; } } x = 0; while ('0' <= c) { x = x * 10 + (c & 15), c = ibuf[pil++]; } if constexpr (is_signed::value || is_same_v) { if (minus) x = -x; } } inline void rd(int16_t &x) { rd_integer(x); } inline void rd(uint16_t &x) { rd_integer(x); } inline void rd(int &x) { rd_integer(x); } inline void rd(long &x) { rd_integer(x); } inline void rd(ll &x) { rd_integer(x); } inline void rd(i128 &x) { rd_integer(x); } inline void rd(uint &x) { rd_integer(x); } inline void rd(ull &x) { rd_integer(x); } inline void rd(u128 &x) { rd_integer(x); } inline void rd(double &x) { rd_real(x); } inline void rd(long double &x) { rd_real(x); } inline void rd(f128 &x) { rd_real(x); } template inline void rd(pair &p) { return rd(p.fi), rd(p.se); } template inline void rd_tuple(T &t) { if constexpr (N < tuple_size::value) { Z &x = get(t); rd(x); rd_tuple(t); } } template inline void rd(tuple &tpl) { rd_tuple(tpl); } template inline void rd(array &x) { for (Z &e : x) rd(e); } TE inline void rd(vc &x) { for (Z &e : x) rd(e); } inline void read() {} template inline void read(H &h, T &...t) { rd(h), read(t...); } inline void wt(const char c) { if (por == SZ) flush(); obuf[por++] = c; } inline void wt(const string s) { for (char c : s) wt(c); } inline void wt(const char *s) { size_t len = strlen(s); for (size_t i = 0; i < len; i++) wt(s[i]); } TE inline void wt_integer(T x) { if (por > SZ - 100) flush(); if (x < 0) { obuf[por++] = '-', x = -x; } int outi; for (outi = 96; x >= 10000; outi -= 4) { memcpy(out + outi, pre.num[x % 10000], 4); x /= 10000; } if (x >= 1000) { memcpy(obuf + por, pre.num[x], 4); por += 4; } else if (x >= 100) { memcpy(obuf + por, pre.num[x] + 1, 3); por += 3; } else if (x >= 10) { int q = (x * 103) >> 10; obuf[por] = q | '0'; obuf[por + 1] = (x - q * 10) | '0'; por += 2; } else obuf[por++] = x | '0'; memcpy(obuf + por, out + outi + 4, 96 - outi); por += 96 - outi; } TE inline void wt_real(T x) { ostringstream oss; oss << fixed << setprecision(10) << double(x); string s = oss.str(); wt(s); } inline void wt(int x) { wt_integer(x); } inline void wt(long x) { wt_integer(x); } inline void wt(ll x) { wt_integer(x); } inline void wt(i128 x) { wt_integer(x); } inline void wt(uint x) { wt_integer(x); } inline void wt(ull x) { wt_integer(x); } inline void wt(u128 x) { wt_integer(x); } inline void wt(double x) { wt_real(x); } inline void wt(long double x) { wt_real(x); } inline void wt(f128 x) { wt_real(x); } template inline void wt(const pair &val) { wt(val.fi); wt(' '); wt(val.se); } template inline void wt_tuple(const T &t) { if constexpr (N < tuple_size::value) { if constexpr (N > 0) { wt(' '); } const Z x = get(t); wt(x); wt_tuple(t); } } template inline void wt(tuple &tpl) { wt_tuple(tpl); } template inline void wt(const array &val) { Z n = val.size(); for (size_t i = 0; i < n; i++) { if (i) wt(' '); wt(val[i]); } } TE inline void wt(const vc &a) { int N = len(a); FOR(i, N) { if (i) wt(' '); wt(a[i]); } } TE inline void wt(const vc> &v) { int N = len(v); FOR(i, N) { wt(v[i]); if (i + 1 != N) wt('\n'); } } template inline void wt(const vc> &v) { int N = len(v); FOR(i, N) { wt(v[i]); if (i + 1 != N) wt('\n'); } } // gcc expansion. called automaticall after main. inline void __attribute__((destructor)) _d() { flush(); } inline void println() { wt('\n'); } template inline void println(Head &&head, Tail &&...tail) { wt(head); if (sizeof...(Tail)) wt(' '); println(forward(tail)...); } #define IN(...) read(__VA_ARGS__) #define print(...) println(__VA_ARGS__) #define FLUSH() flush() #line 6 "No_1145_Sums_of_Powers.cpp" // #include "YRS/random/rng.hpp" // #include "YRS/ds/basic/retsu.hpp" // #include "YRS/mod/mint.hpp" // #include "YRS/aa/def.hpp" #line 2 "YRS/poly/sum_of_pow.hpp" #line 2 "YRS/poly/conv_all.hpp" #line 2 "YRS/poly/c/bs.hpp" #line 2 "YRS/poly/c/fps_t.hpp" #line 2 "YRS/mod/mint_t.hpp" #define c constexpr template struct mint_t { using T = mint_t; static c uint m = mod; uint x; c inline uint val() const { return x; } c mint_t() : x(0) {} c mint_t(uint x) : x(x % m) {} c mint_t(ull x) : x(x % m) {} c mint_t(u128 x) : x(x % m) {} c mint_t(int x) : x((x %= mod) < 0 ? x + mod : x) {} c mint_t(ll x) : x((x %= mod) < 0 ? x + mod : x) {} c mint_t(i128 x) : x((x %= mod) < 0 ? x + mod : x) {} c T &operator+=(T p) { if ((x += p.x) >= m) x -= m; return *this; } c T &operator-=(T p) { if ((x += m - p.x) >= m) x -= m; return *this; } c T operator+(T p) const { return T(*this) += p; } c T operator-(T p) const { return T(*this) -= p; } c T &operator*=(T p) { x = ull(x) * p.x % m; return *this; } c T operator*(T p) const { return T(*this) *= p; } c T &operator/=(T p) { return *this *= p.inv(); } c T operator/(T p) const { return T(*this) /= p; } c T operator-() const { return T::gen(x ? mod - x : 0); } c T inv() const { int a = x, b = mod, x = 1, y = 0; while (b > 0) { int t = a / b; swap(a -= t * b, b); swap(x -= t * y, y); } return T(x); } c T pow(ll k) const { if (k < 0) return inv().pow(-k); T s(1), a(x); for (; k; k >>= 1, a *= a) if (k & 1) s *= a; return s; } c bool operator<(T p) const { return x < p.x; } c bool operator==(T p) const { return x == p.x; } c bool operator!=(T p) const { return x != p.x; } static c T gen(uint x) { T s; s.x = x; return s; } friend istream &operator>>(istream &cin, T &p) { ll t; cin >> t; p = t; return cin; } friend ostream &operator<<(ostream &cout, T p) { return cout << p.x; } static c int get_mod() { return mod; } static c PII ntt_info() { if (mod == 167772161) return {25, 17}; if (mod == 469762049) return {26, 30}; if (mod == 754974721) return {24, 362}; if (mod == 998244353) return {23, 31}; if (mod == 120586241) return {20, 74066978}; if (mod == 880803841) return {23, 211}; if (mod == 943718401) return {22, 663003469}; if (mod == 1004535809) return {21, 582313106}; if (mod == 1012924417) return {21, 368093570}; return {-1, -1}; } static c bool can_ntt() { return ntt_info().fi != -1; } }; #undef c using M99 = mint_t<998244353>; using M17 = mint_t<1000000007>; #ifdef FIO template void rd(mint_t &x) { LL(y); x = y; } template void wt(mint_t x) { wt(x.x); } #endif #line 2 "YRS/poly/c/binom.hpp" TE struct binom { const int p = T::get_mod(); vc fa{1, 1}, ifa{1, 1}, in{0, 1}; T inv(int n) { assert(0 <= n); while (len(in) <= n) { int k = len(in); int q = (p + k - 1) / k; int r = k * q - p; in.ep(in[r] * T(q)); } return in[n]; } T fac(int n) { if (n >= p) return 0; while (len(fa) <= n) { int k = len(fa); fa.ep(fa[k - 1] * T(k)); } return fa[n]; } T ifac(int n) { if (n < 0) return T(0); while (len(ifa) <= n) ifa.ep(ifa.back() * inv(len(ifa))); return ifa[n]; } T C(int N, int K) { assert(N >= 0); if (K < 0 or N < K) return 0; return fac(N) * ifac(K) * ifac(N - K); } T lucas(ll N, ll K) { if (K > N) return 0; if (K == 0) return 1; return C(N % p, K % p) * lucas(N / p, K / p); } T C_naive(ll N, ll K) { assert(N >= 0); if (K < 0 or N < K) return 0; chmin(K, N - K); T x = 1; FOR(i, K) x *= (N - i); return x * ifac(K); } }; #line 5 "YRS/poly/c/fps_t.hpp" // 动态模数需要在设置模数后进行构造 TE struct fps_t : binom { using fps = vc; using cf = const fps; using U = binom; using U::inv, U::fac, U::ifac, U::C; static constexpr int p0 = 167'772'161, p1 = 469'762'049, p2 = 754'974'721; using f0 = fps_t>; using f1 = fps_t>; using f2 = fps_t>; static void sh(fps &a, int N); // 非0项数量 static int count_terms(cf &f); T crt(ull a, ull b, ull c); static void ntt(fps &a, bool in); static void trans_ntt(fps &a, bool in); static void ntt_db(fps &f, bool transed = 0); fps conv_naive(cf &a, cf &b); fps conv_kara(cf &f, cf &g); static fps conv_ntt(fps a, fps b); fps conv_mtt(cf &a, cf &b); fps conv(cf &a, cf &b); static fps sq_ntt(fps a); fps sq_mtt(cf &a); fps sq(cf &a); // 微分 fps diff(cf &f); // 积分 fps inte(cf &f); // 定积分 T inte(cf &f, T l, T r); fps inv_sp(cf &f); fps inv_ntt(cf &a); fps inv_mtt(cf &a); fps inv(cf &f); fps div_sp(fps f, fps g); fps div_ntt(cf &f, cf &g); fps div_mtt(fps f, fps g); fps div_dense(cf &f, cf &g); fps div(cf &f, cf &g); fps log_sp(cf &f); fps log_dense(cf &f); fps log(cf &f); fps exp_sp(cf &f); fps exp_ntt(cf &f); fps exp_mtt(cf &e); fps exp_dense(cf &f); fps exp(cf &f); fps pw_sp(cf &f, T k); fps pw_dense(cf &f, T k); fps pw(cf &f, T k); fps pow(cf &f, ll k); // O(Nlog^2N) N 为总度数 fps conv_all(vc f); // prod 1 - f[i]x fps conv_all(fps f); fps sum_of_pow(cf &a, int N); fps sum_of_pow(ll l, ll r, int N); fps sum_of_pow(cf &a, cf &c, int N); pair sum_of_rationals_1(cf &a, cf &b); }; #line 4 "YRS/poly/c/bs.hpp" TE Z fps_t::sh(fps &a, int N) -> void { a.resize(N); } TE Z fps_t::count_terms(cf &f) -> int { int s = 0, N = len(f); FOR(i, N) s += f[i].val() != 0; return s; } TE Z fps_t::crt(ull a, ull b, ull c) -> T { constexpr ull x = 104'391'568, xx = 190'329'765; ull t = (b - a + p1) * x % p1, s = a + t * p0; t = (c - s % p2 + p2) * xx % p2; return T(s) + T(t) * T(ull(p0) * p1); } TE Z fps_t::ntt(fps &a, bool in) -> void { assert(T::can_ntt()); const int p = T::ntt_info().fi; const uint m = T::get_mod(); static array r, ir, ra, ira, rat, irat; assert(p != -1 and len(a) <= (1 << max(0, p))); static bool ok = 0; if (not ok) { ok = 1; r[p] = T::ntt_info().se; ir[p] = T(1) / r[p]; FOR_R(i, p) { r[i] = r[i + 1] * r[i + 1]; ir[i] = ir[i + 1] * ir[i + 1]; } T s = 1, in = 1; FOR(i, p - 1) { ra[i] = r[i + 2] * s; ira[i] = ir[i + 2] * in; s *= ir[i + 2]; in *= r[i + 2]; } s = 1, in = 1; FOR(i, p - 2) { rat[i] = r[i + 3] * s; irat[i] = ir[i + 3] * in; s *= ir[i + 3]; in *= r[i + 3]; } } int N = len(a), n = topbit(N); if (not in) { int sz = 0; while (sz < n) { if (n - sz == 1) { int p = 1 << (n - sz - 1); T c = 1; FOR(s, 1 << sz) { int of = s << (n - sz); FOR(i, p) { T l = a[i + of], r = a[i + of + p] * c; a[i + of] = l + r, a[i + of + p] = l - r; } c *= ra[topbit(~s & -~s)]; } ++sz; } else { int p = 1 << (n - sz - 2); T c = 1, in = r[2]; FOR(s, 1 << sz) { T r2 = c * c, r3 = r2 * c; int of = s << (n - sz); FOR(i, p) { const ull mm = ull(m) * m; ull a0 = a[i + of].val(), a1 = ull(a[i + of + p].val()) * c.val(); ull aa = ull(a[i + of + 2 * p].val()) * r2.val(); ull bb = ull(a[i + of + 3 * p].val()) * r3.val(); ull t = (a1 + mm - bb) % m * in.val(); ull na = mm - aa; a[i + of] = a0 + a1 + aa + bb; a[i + of + p] = a0 + aa + mm * 2 - a1 - bb; a[i + of + 2 * p] = a0 + na + t; a[i + of + 3 * p] = a0 + na + mm - t; } c *= rat[topbit(~s & -~s)]; } sz += 2; } } } else { T c = T(1) / T(N); FOR(i, N) a[i] *= c; int sz = n; while (sz) { if (sz == 1) { int p = 1 << (n - sz); T c = 1; FOR(s, 1 << (sz - 1)) { int of = s << (n - sz + 1); FOR(i, p) { ull l = a[i + of].val(), r = a[i + of + p].val(); a[i + of] = l + r; a[i + of + p] = (m + l - r) * c.val(); } c *= ira[topbit(~s & -~s)]; } --sz; } else { int p = 1 << (n - sz); T c = 1, in = ir[2]; FOR(s, 1 << (sz - 2)) { T r2 = c * c, r3 = r2 * c; int of = s << (n - sz + 2); FOR(i, p) { ull a0 = a[i + of].val(), a1 = a[i + of + p].val(); ull aa = a[i + of + 2 * p].val(); ull bb = a[i + of + 3 * p].val(); ull x = (m + aa - bb) * in.val() % m; a[i + of] = a0 + a1 + aa + bb; a[i + of + p] = (a0 + m - a1 + x) * c.val(); a[i + of + 2 * p] = (a0 + a1 + 2 * m - aa - bb) * r2.val(); a[i + of + 3 * p] = (a0 + 2 * m - a1 - x) * r3.val(); } c *= irat[topbit(~s & -~s)]; } sz -= 2; } } } } TE Z fps_t::conv_naive(cf &a, cf &b) -> fps { int N = len(a), M = len(b), sz = N + M - 1; if (not N or not M) return {}; if (N > M) return conv_naive(b, a); fps c(sz); FOR(i, N) FOR(k, M) c[i + k] += a[i] * b[k]; return c; } TE Z fps_t::conv_kara(cf &f, cf &g) -> fps { constexpr int lm = 30; if (min(len(f), len(g)) <= lm) return conv_naive(f, g); int N = max(len(f), len(g)), M = ceil(N, 2); fps f1, f2, g1, g2; if (len(f) < M) f1 = f; if (len(f) >= M) f1 = {f.begin(), f.begin() + M}; if (len(f) >= M) f2 = {f.begin() + M, f.end()}; if (len(g) < M) g1 = g; if (len(g) >= M) g1 = {g.begin(), g.begin() + M}; if (len(g) >= M) g2 = {g.begin() + M, g.end()}; fps a = conv_kara(f1, g1); fps b = conv_kara(f2, g2); FOR(i, len(f2)) f1[i] += f2[i]; FOR(i, len(g2)) g1[i] += g2[i]; fps c = conv_kara(f1, g1); fps F(len(f) + len(g) - 1); FOR(i, len(a)) F[i] += a[i], c[i] -= a[i]; FOR(i, len(b)) F[2 * M + i] += b[i], c[i] -= b[i]; if (c.back() == T(0)) c.pop_back(); FOR(i, len(c)) if (c[i] != T(0)) F[M + i] += c[i]; return F; } TE Z fps_t::conv_ntt(fps a, fps b) -> fps { assert(T::can_ntt()); int N = len(a), M = len(b), sz = 1; if (min(N, M) == 0) return {}; while (sz < N + M - 1) sz <<= 1; sh(a, sz), sh(b, sz); ntt(a, 0); ntt(b, 0); FOR(i, sz) a[i] *= b[i]; ntt(a, 1); sh(a, N + M - 1); return a; } TE Z fps_t::conv_mtt(cf &a, cf &b) -> fps { int N = len(a), M = len(b); if (not N or not M) return {}; f0::fps a0(N), b0(M); f1::fps a1(N), b1(M); f2::fps a2(N), b2(M); FOR(i, N) a0[i] = a[i].val(), a1[i] = a[i].val(), a2[i] = a[i].val(); FOR(i, M) b0[i] = b[i].val(), b1[i] = b[i].val(), b2[i] = b[i].val(); Z c0 = f0::conv_ntt(a0, b0); Z c1 = f1::conv_ntt(a1, b1); Z c2 = f2::conv_ntt(a2, b2); fps c(len(c0)); FOR(i, N + M - 1) c[i] = crt(c0[i].val(), c1[i].val(), c2[i].val()); return c; } TE Z fps_t::conv(cf &a, cf &b) -> fps { int N = len(a), M = len(b); if (min(N, M) == 0) return {}; if (T::can_ntt()) { if (min(N, M) <= 50) return conv_kara(a, b); return conv_ntt(a, b); } if (min(N, M) <= 200) return conv_kara(a, b); return conv_mtt(a, b); } TE Z fps_t::sq_ntt(fps a) -> fps { assert(T::can_ntt()); int N = len(a), sz = 1; if (N == 0) return {}; while (sz < N + N - 1) sz <<= 1; sh(a, sz); ntt(a, 0); FOR(i, sz) a[i] *= a[i]; ntt(a, 1); sh(a, N + N - 1); return a; } TE Z fps_t::sq_mtt(cf &a) -> fps { int N = len(a); if (N == 0) return {}; f0::fps a0(N); f1::fps a1(N); f2::fps a2(N); FOR(i, N) a0[i] = a[i].val(), a1[i] = a[i].val(), a2[i] = a[i].val(); Z c0 = f0::sq_ntt(a0); Z c1 = f1::sq_ntt(a1); Z c2 = f2::sq_ntt(a2); fps c(len(c0)); FOR(i, N + N - 1) c[i] = crt(c0[i].val(), c1[i].val(), c2[i].val()); return c; } TE Z fps_t::sq(cf &a) -> fps { int N = len(a); if (T::can_ntt()) { if (N <= 50) return conv_naive(a, a); return sq_ntt(a); } if (N <= 150) return conv_kara(a, a); return sq_mtt(a); } TE Z fps_t::diff(cf &f) -> fps { int N = len(f); if (N <= 1) return {}; fps g(N - 1); FOR(i, N - 1) g[i] = f[i + 1] * T(i + 1); return g; } TE Z fps_t::inte(cf &f) -> fps { int N = len(f); fps g(N + 1); FOR(i, 1, N + 1) g[i] = f[i - 1] * inv(i); return g; } TE Z fps_t::inte(cf &f, T l, T r) -> T { T s = 0, L = 1, R = 1; int N = len(f); FOR(i, N) { L *= l, R *= r; s += inv(i + 1) * f[i] * (L - R); } return s; } #line 2 "YRS/poly/c/ntt_db.hpp" #line 2 "YRS/poly/c/trans_mtt.hpp" #line 4 "YRS/poly/c/trans_mtt.hpp" TE Z fps_t::trans_ntt(vc &a, bool in) -> void { assert(T::can_ntt()); const int p = T::ntt_info().fi; const uint mod = T::get_mod(); static array r, ir, rt, irt, rat, irat; assert(p != -1 and len(a) <= (1 << max(0, p))); static bool ok = 0; if (not ok) { ok = 1; r[p] = T::ntt_info().se; ir[p] = T(1) / r[p]; FOR_R(i, p) { r[i] = r[i + 1] * r[i + 1]; ir[i] = ir[i + 1] * ir[i + 1]; } T s = 1, in = 1; FOR(i, p - 1) { rt[i] = r[i + 2] * s; irt[i] = ir[i + 2] * in; s *= ir[i + 2]; in *= r[i + 2]; } s = 1, in = 1; FOR(i, p - 2) { rat[i] = r[i + 3] * s; irat[i] = ir[i + 3] * in; s *= ir[i + 3]; in *= r[i + 3]; } } int N = len(a), n = topbit(N); assert(N == 1 << n); if (not in) { int sz = n; while (sz > 0) { if (sz == 1) { int p = 1 << (n - sz); T c = 1; FOR(s, 1 << (sz - 1)) { int of = s << (n - sz + 1); FOR(i, p) { ull l = a[i + of].val(), r = a[i + of + p].val(); a[i + of] = l + r, a[i + of + p] = (mod + l - r) * c.val(); } c *= rt[topbit(~s & -~s)]; } --sz; } else { int p = 1 << (n - sz); T c = 1, in = r[2]; FOR(s, 1 << (sz - 2)) { int of = s << (n - sz + 2); T r2 = c * c, r3 = r2 * c; FOR(i, p) { ull a0 = a[i + of + 0 * p].val(); ull a1 = a[i + of + 1 * p].val(); ull a2 = a[i + of + 2 * p].val(); ull a3 = a[i + of + 3 * p].val(); ull x = (mod + a2 - a3) * in.val() % mod; a[i + of] = a0 + a1 + a2 + a3; a[i + of + 1 * p] = (a0 + mod - a1 + x) * c.val(); a[i + of + 2 * p] = (a0 + a1 + 2 * mod - a2 - a3) * r2.val(); a[i + of + 3 * p] = (a0 + 2 * mod - a1 - x) * r3.val(); } c *= rat[topbit(~s & -~s)]; } sz -= 2; } } } else { T c = T(1) / T(len(a)); FOR(i, len(a)) a[i] *= c; int sz = 0; while (sz < n) { if (sz == n - 1) { int p = 1 << (n - sz - 1); T c = 1; FOR(s, 1 << sz) { int of = s << (n - sz); FOR(i, p) { T l = a[i + of], r = a[i + of + p] * c; a[i + of] = l + r, a[i + of + p] = l - r; } c *= irt[topbit(~s & -~s)]; } ++sz; } else { int p = 1 << (n - sz - 2); T c = 1, in = ir[2]; FOR(s, 1 << sz) { T r2 = c * c, r3 = r2 * c; int of = s << (n - sz); FOR(i, p) { ull m2 = ull(mod) * mod; ull a0 = a[i + of].val(); ull a1 = ull(a[i + of + p].val()) * c.val(); ull a2 = ull(a[i + of + 2 * p].val()) * r2.val(); ull a3 = ull(a[i + of + 3 * p].val()) * r3.val(); ull t = (a1 + m2 - a3) % mod * in.val(); ull na = m2 - a2; a[i + of] = a0 + a1 + a2 + a3; a[i + of + 1 * p] = a0 + a2 + (2 * m2 - a1 - a3); a[i + of + 2 * p] = a0 + na + t; a[i + of + 3 * p] = a0 + na + m2 - t; } c *= irat[topbit(~s & -~s)]; } sz += 2; } } } } #line 5 "YRS/poly/c/ntt_db.hpp" TE Z fps_t::ntt_db(fps &a, bool transed) -> void { static array rt; static bool ok = 0; if (not ok) { ok = 1; const int s = T::ntt_info().fi; rt[s] = T::ntt_info().se; FOR_R(i, s) rt[i] = rt[i + 1] * rt[i + 1]; } if (not transed) { int N = len(a); Z b = a; ntt(b, 1); T r = 1, z = rt[topbit(N << 1)]; FOR(i, N) b[i] *= r, r *= z; ntt(b, 0); copy(all(b), back_inserter(a)); } else { int N = len(a) >> 1; vc t{a.begin(), a.begin() + N}; a = {a.begin() + N, a.end()}; trans_ntt(a, 0); T r = 1, z = rt[topbit(N << 1)]; FOR(i, N) a[i] *= r, r *= z; trans_ntt(a, 1); FOR(i, N) a[i] += t[i]; } } #line 5 "YRS/poly/conv_all.hpp" TE Z fps_t::conv_all(vc f) -> fps { if (f.empty()) return {T(1)}; while (1) { int n = len(f); if (n == 1) break; int m = (n + 1) >> 1; FOR(i, m) { if (i + i + 1 == n) f[i] = f[i << 1]; else f[i] = conv(f[i << 1], f[i << 1 | 1]); } f.resize(m); } return f[0]; } TE Z fps_t::conv_all(fps f) -> fps { if (not T::can_ntt()) { vc g; for (T x : f) g.ep(fps{T(1), -x}); return conv_all(g); } constexpr int D = 6; int N = 1, sz = len(f); while (N < sz) N <<= 1; int k = topbit(N); vc s(N), nx(N); FOR(i, sz) s[i] = -f[i]; FOR(d, k) { int b = 1 << d; if (d < D) { fill(all(nx), T(0)); FOR(L, 0, N, b << 1) { FOR(i, b) FOR(j, b) nx[L + i + j] += s[L + i] * s[L + b + j]; FOR(i, b) nx[L + b + i] += s[L + i] + s[L + b + i]; } } else if (d == D) { FOR(L, 0, N, b << 1) { vc sl{s.begin() + L, s.begin() + L + b}; vc sr{s.begin() + L + b, s.begin() + L + 2 * b}; sh(sl, b << 1); sh(sr, b << 1); ntt(sl, 0); ntt(sr, 0); FOR(i, b) nx[L + i] = sl[i] * sr[i] + sl[i] + sr[i]; FOR(i, b, b << 1) nx[L + i] = sl[i] * sr[i] - sl[i] - sr[i]; } } else { FOR(L, 0, N, b << 1) { vc sl{s.begin() + L, s.begin() + L + b}; vc sr{s.begin() + L + b, s.begin() + L + 2 * b}; ntt_db(sl); ntt_db(sr); FOR(i, b) nx[L + i] = sl[i] * sr[i] + sl[i] + sr[i]; FOR(i, b, b << 1) nx[L + i] = sl[i] * sr[i] - sl[i] - sr[i]; } } s.swap(nx); } if (k - 1 >= D) ntt(s, 1); s.ep(1); reverse(s); sh(s, sz + 1); return s; } #line 2 "YRS/poly/fps_log.hpp" #line 2 "YRS/poly/fps_div.hpp" #line 2 "YRS/poly/fps_inv.hpp" #line 4 "YRS/poly/fps_inv.hpp" TE Z fps_t::inv_sp(cf &f) -> fps { int N = len(f); vc> a; FOR(i, 1, N) if (f[i] != T(0)) a.ep(i, f[i]); fps g(N); T t = T(1) / f[0]; g[0] = t; FOR(i, 1, N) { T s = 0; for (Z &&[x, y] : a) { if (x > i) break; s -= y * g[i - x]; } g[i] = s * t; } return g; } TE Z fps_t::inv_ntt(cf &a) -> fps { fps s{T(1) / a[0]}; int N = len(a), n = 1; s.reserve(N); for (; n < N; n <<= 1) { fps f(n << 1), g(n << 1); int sz = min(N, n << 1); FOR(i, sz) f[i] = a[i]; FOR(i, n) g[i] = s[i]; ntt(f, 0); ntt(g, 0); FOR(i, n << 1) f[i] *= g[i]; ntt(f, 1); FOR(i, n) f[i] = 0; ntt(f, 0); FOR(i, n << 1) f[i] *= g[i]; ntt(f, 1); FOR(i, n, sz) s.ep(-f[i]); } return s; } TE Z fps_t::inv_mtt(cf &a) -> fps { int N = len(a), n = 1; fps c{a[0].inv()}, p; for (; n < N; n <<= 1) { p = sq(c); sh(p, n << 1); fps f(begin(a), begin(a) + min(n << 1, N)); p = conv(p, f); sh(c, n << 1); FOR(i, n << 1) c[i] = c[i] + c[i] - p[i]; } sh(c, N); return c; } TE Z fps_t::inv(cf &f) -> fps { int t = count_terms(f), c = T::can_ntt() ? 160 : 820; if (t < c) return inv_sp(f); return T::can_ntt() ? inv_ntt(f) : inv_mtt(f); } #line 5 "YRS/poly/fps_div.hpp" TE Z fps_t::div_sp(fps f, fps g) -> fps { if (g[0].val() != 1) { T c = g[0].inv(); for (T &x : f) x *= c; for (T &x : g) x *= c; } vc> a; int N = len(g); FOR(i, 1, N) if (g[i].val() != 0) a.ep(i, -g[i]); N = len(f); FOR(i, N) for (Z &&[x, y] : a) f[i] += y * f[i - x]; return f; } TE Z fps_t::div_ntt(cf &f, cf &g) -> fps { int N = len(f), M = len(g); if (N == 1) return {f[0] / g[0]}; int m = 1; while (m + m < N) m <<= 1; fps a(m << 1), b(m << 1), c(g); sh(c, m); c = inv(c); sh(c, m << 1); ntt(c, 0); FOR(i, m) a[i] = f[i]; FOR(i, m, N) a[i] = 0; ntt(a, 0); FOR(i, m << 1) a[i] *= c[i]; ntt(a, 1); fps s(N); FOR(i, m) s[i] = a[i]; FOR(i, m, m << 1) a[i] = 0; ntt(a, 0); FOR(i, min(m << 1, M)) b[i] = g[i]; FOR(i, min(m << 1, M), m << 1) b[i] = 0; ntt(b, 0); FOR(i, m << 1) a[i] *= b[i]; ntt(a, 1); FOR(i, m) a[i] = 0; FOR(i, m, min(m << 1, N)) a[i] -= f[i]; ntt(a, 0); FOR(i, m << 1) a[i] *= c[i]; ntt(a, 1); FOR(i, m, N) s[i] -= a[i]; return s; } TE Z fps_t::div_mtt(fps f, fps g) -> fps { int N = len(f); sh(g, N); g = inv(g); f = conv(f, g); sh(f, N); return f; } TE Z fps_t::div_dense(cf &f, cf &g) -> fps { return T::can_ntt() ? div_ntt(f, g) : div_mtt(f, g); } TE Z fps_t::div(cf &f, cf &g) -> fps { if (count_terms(g) < 50) return div_sp(f, g); return T::can_ntt() ? div_ntt(f, g) : div_mtt(f, g); } #line 5 "YRS/poly/fps_log.hpp" TE Z fps_t::log_sp(cf &f) -> fps { int N = len(f); vc> a; FOR(i, 1, N) if (f[i].val() != 0) a.ep(i, f[i]); fps b(N), c(N - 1); FOR(i, N - 1) { T s = f[i + 1] * T(i + 1); for (Z &&[x, y] : a) { if (x > i) break; s -= y * c[i - x]; } c[i] = s; b[i + 1] = s * inv(i + 1); } return b; } TE Z fps_t::log_dense(cf &f) -> fps { assert(f[0] == T(1)); int N = len(f); fps c(f); FOR(i, N) c[i] *= i; c = div_dense(c, f); FOR(i, N) c[i] *= inv(i); return c; } TE Z fps_t::log(cf &f) -> fps { assert(f[0] == T(1)); int c = count_terms(f), t = T::can_ntt() ? 200 : 1200; return c <= t ? log_sp(f) : log_dense(f); } #line 5 "YRS/poly/sum_of_pow.hpp" // sum of ai^n {n = 0, 1, ... N} TE Z fps_t::sum_of_pow(cf &a, int N) -> fps { Z f = conv_all(a); sh(f, N + 1); f = log(f); FOR(i, N + 1) f[i] = -f[i] * T(i); f[0] = len(a); return f; } // sum of i^n (i in [L,R)) return vec[N + 1] TE Z fps_t::sum_of_pow(ll l, ll r, int N) -> fps { vc f(N + 1), g(N + 1); T ls = 1, rs = 1; FOR(i, 1, N + 2) { ls *= T(l), rs *= T(r); f[i - 1] = (rs - ls) * ifac(i); g[i - 1] = ifac(i); } f = div(f, g); FOR(i, N + 1) f[i] *= fac(i); return f; } // sum of ci * ai^n TE Z fps_t::sum_of_pow(cf &a, cf &c, int N) -> fps { Z [x, y] = sum_of_rationals_1(a, c); sh(x, N + 1); sh(y, N + 1); fps f = conv(inv(y), x); sh(f, N + 1); return f; } #line 11 "No_1145_Sums_of_Powers.cpp" using mint = M99; using fps = vc; fps_t X; void Yorisou() { INT(N, K); VEC(mint, a, N); a = X.sum_of_pow(a, K); a.erase(begin(a)); print(a); } constexpr int tests = 0, fl = 0, DB = 10; #line 1 "YRS/aa/main.hpp" int main() { cin.tie(0)->sync_with_stdio(0); int T = 1; if (fl) cerr.tie(0); if (tests and not fl) IN(T); for (int i = 0; i < T or fl; ++i) { Yorisou(); if (fl and i % DB == 0) cerr << "Case: " << i << '\n'; } return 0; } #line 24 "No_1145_Sums_of_Powers.cpp"