#line 1 "No_1145_Sums_of_Powers.cpp" #define YRSD #line 2 "YRS/all.hpp" #line 2 "YRS/aa/head.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TE template #define TN typename #define Z auto #define ep emplace_back #define eb emplace #define fi first #define se second #define all(x) (x).begin(), (x).end() #define OV4(a, b, c, d, e, ...) e #define FOR1(a) for (int _ = 0; _ < (a); ++_) #define FOR2(i, a) for (int i = 0; i < (a); ++i) #define FOR3(i, a, b) for (int i = (a); i < (b); ++i) #define FOR4(i, a, b, c) for (int i = (a); i < (b); i += (c)) #define FOR(...) OV4(__VA_ARGS__, FOR4, FOR3, FOR2, FOR1)(__VA_ARGS__) #define FOR1_R(a) for (int _ = (a) - 1; _ >= 0; --_) #define FOR2_R(i, a) for (int i = (a) - 1; i >= 0; --i) #define FOR3_R(i, a, b) for (int i = (b) - 1; i >= (a); --i) #define FOR4_R(i, a, b, c) for (int i = (b) - 1; i >= (a); i -= (c)) #define FOR_R(...) OV4(__VA_ARGS__, FOR4_R, FOR3_R, FOR2_R, FOR1_R)(__VA_ARGS__) #define FOR_subset(t, s) for (int t = (s); t > -1; t = (t == 0 ? -1 : (t - 1) & s)) using std::array, std::bitset, std::deque, std::greater, std::less, std::map, std::multiset, std::pair, std::priority_queue, std::set, std::istream, std::ostream, std::string, std::vector, std::tuple, std::function, std::cerr; using std::cin, std::cout, std::swap, std::iota, std::endl, std::prev, std::next, std::min, std::max, std::tie, std::move, std::reverse; TE using vc = vector; TE using vvc = vector>; TE using T1 = tuple; TE using T2 = tuple; TE using T3 = tuple; TE using T4 = tuple; TE using max_heap = priority_queue; TE using min_heap = priority_queue, greater>; using u8 = unsigned char; using uint = unsigned int; using ll = long long; using ull = unsigned long long; using ld = long double; using i128 = __int128; using u128 = __uint128_t; using f128 = __float128; using PII = pair; using PLL = pair; #ifdef YRSD constexpr bool dbg = 1; #else constexpr bool dbg = 0; #endif #line 2 "YRS/IO/IO.hpp" istream &operator>>(istream &I, i128 &x) { static string s; I >> s; int f = s[0] == '-'; x = 0; const int N = (int)s.size(); FOR(i, f, N) x = x * 10 + s[i] - '0'; if (f) x = -x; return I; } ostream &operator<<(ostream &O, i128 x) { static string s; s.clear(); bool f = x < 0; if (f) x = -x; while (x) s += '0' + x % 10, x /= 10; if (s.empty()) s += '0'; if (f) s += '-'; return std::reverse(all(s)), O << s; } istream &operator>>(istream &I, f128 &x) { static string s; return I >> s, x = std::stold(s), I; } ostream &operator<<(ostream &O, const f128 x) { return O << ld(x); } TE istream &operator>>(istream &I, tuple &t) { return std::apply([&I](Z &...args) { ((I >> args), ...); }, t), I; } TE istream &operator>>(istream &I, pair &x) { return I >> x.fi >> x.se; } TE ostream &operator<<(ostream &O, const pair &x) { return O << x.fi << ' ' << x.se; } TE requires requires(V &c) { std::begin(c); std::end(c); } and (not std::is_same_v, string>) istream &operator>>(istream &I, V &c) { for (Z &e : c) I >> e; return I; } TE requires requires(const V &c) { std::begin(c); std::end(c); } and (not std::is_same_v, const char*>) and (not std::is_same_v, string>) and (not std::is_array_v> or not std::is_same_v>, char>) ostream &operator<<(ostream &O, const V &c) { if (c.empty()) return O; Z it = c.begin(); O << *it++; std::for_each(it, c.end(), [&O](const Z &e) { O << ' ' << e; }); return O; } bool IN() { return true; } TE bool IN(T &x, S &...y) { if (not(cin >> x)) return false; return IN(y...); } void print() { cout << '\n'; } TE void print(T &&x, S &&...y) { cout << x; if constexpr (sizeof...(S)) cout << ' '; print(std::forward(y)...); } void put() { cout << ' '; } TE void put(T &&x, S &&...y) { cout << x; if constexpr (sizeof...(S)) cout << ' '; put(std::forward(y)...); } #define INT(...) int __VA_ARGS__; IN(__VA_ARGS__) #define LL(...) ll __VA_ARGS__; IN(__VA_ARGS__) #define ULL(...) ull __VA_ARGS__; IN(__VA_ARGS__) #define I128(...) i128 __VA_ARGS__; IN(__VA_ARGS__) #define STR(...) string __VA_ARGS__; IN(__VA_ARGS__) #define CH(...) char __VA_ARGS__; IN(__VA_ARGS__) #define REAL(...) RE __VA_ARGS__; IN(__VA_ARGS__) #define VEC(T, a, n) vector a(n); IN(a) #define VVEC(T, a, n, m) vector a(n, vector(m)); IN(a) void YES(bool o = 1) { print(o ? "YES" : "NO"); } void Yes(bool o = 1) { print(o ? "Yes" : "No"); } void yes(bool o = 1) { print(o ? "yes" : "no"); } void NO(bool o = 1) { YES(not o); } void No(bool o = 1) { Yes(not o); } void no(bool o = 1) { yes(not o); } void ALICE(bool o = 1) { print(o ? "ALICE" : "BOB"); } void Alice(bool o = 1) { print(o ? "Alice" : "Bob"); } void alice(bool o = 1) { print(o ? "alice" : "bob"); } void BOB(bool o = 1) { ALICE(not o); } void Bob(bool o = 1) { Alice(not o); } void bob(bool o = 1) { alice(not o); } void POSSIBLE(bool o = 1) { print(o ? "POSSIBLE" : "IMPOSSIBLE"); } void Possible(bool o = 1) { print(o ? "Possible" : "Impossible"); } void possible(bool o = 1) { print(o ? "possible" : "impossible"); } void IMPOSSIBLE(bool o = 1) { POSSIBLE(not o); } void Impossible(bool o = 1) { Possible(not o); } void impossible(bool o = 1) { possible(not o); } void TAK(bool o = 1) { print(o ? "TAK" : "NIE"); } void NIE(bool o = 1) { TAK(not o); } #line 5 "YRS/all.hpp" constexpr ld pi = 3.141592653589793L; TE constexpr T inf = std::numeric_limits::max(); TE<> constexpr i128 inf = i128(std::numeric_limits::max()) * 2'000'000'000'000'000'000; TE constexpr pair inf> = {inf, inf}; TE constexpr static int popcount(T x) { using U = std::make_unsigned_t; return std::__popcount(static_cast(x)); } TE constexpr static int pc(T x) { return popcount(x); } TE constexpr static ll len(const T &a) { return a.size(); } TE constexpr static string to_s(T x) { return std::to_string(x); } TE void reverse(T &a) { reverse(all(a)); } TE void sort(T &a) { std::sort(all(a)); } TE void sort(T &a, Z cmp) { std::sort(all(a), cmp); } TE void unique(T &a) { std::sort(all(a)); a.erase(std::unique(all(a)), a.end()); } TE vc inverse(const vc &A) { int N = len(A); vc B(N, -1); FOR(i, N) if (A[i] != -1) B[A[i]] = i; return B; } Z QMAX(const Z &A) { return *std::max_element(all(A)); } Z QMIN(const Z &A) { return *std::min_element(all(A)); } constexpr bool chmax(Z &a, const Z &b) { return (a < b ? a = b, true : false); } constexpr bool chmin(Z &a, const Z &b) { return (a > b ? a = b, true : false); } TE constexpr static pair operator-(const pair &p) { return pair(-p.fi, -p.se); } TE vc argsort(const T &A) { vc I(A.size()); iota(all(I), 0); std::sort(all(I), [&](int i, int k) { return A[i] < A[k] or (A[i] == A[k] and i < k); }); return I; } TE vc rearrange(const vc &A, const vc &I) { int N = len(I); vc B(N); FOR(i, N) B[i] = A[I[i]]; return B; } TE vc pre_sum(const vc &v) { int N = v.size(); vc A(N + 1); FOR(i, N) A[i + 1] = A[i] + v[i]; if constexpr (off == 0) A.erase(A.begin()); return A; } TE vc s_to_vec(const string &s, char off) { int N = len(s); vc A(N); FOR(i, N) A[i] = (s[i] != '?' ? s[i] - off : -1); return A; } TE constexpr static int topbit(T x) { if (x == 0) return - 1; if constexpr (sizeof(T) <= 4) return 31 - __builtin_clz(x); else return 63 - __builtin_clzll(x); } TE constexpr static int lowbit(T x) { if (x == 0) return -1; if constexpr (sizeof(T) <= 4) return __builtin_ctz(x); else return __builtin_ctzll(x); } TE constexpr T floor(T x, T y) { return x / y - (x % y and (x ^ y) < 0); } TE constexpr T ceil(T x, T y) { return floor(x + y - 1, y); } TE pair divmod(T x, T y) { T q = floor(x, y); return pair{q, x - q * y}; } TE T SUM(const Z &v) { return std::accumulate(all(v), T(0)); } Z LB(const Z &a, Z x) { return std::lower_bound(all(a), x); } Z UB(const Z &a, Z x) { return std::upper_bound(all(a), x); } int lower_bound(const Z &a, Z x) { return LB(a, x) - a.begin(); } int upper_bound(const Z &a, Z x) { return UB(a, x) - a.begin(); } int lb(const Z &a, Z x) { return LB(a, x) - a.begin(); } int ub(const Z &a, Z x) { return UB(a, x) - a.begin(); } TE ll bina(const Z &F, ll L, ll R) { if constexpr (ck) assert(F(L)); while (std::abs(L - R) > 1) { ll x = (R + L) >> 1; (F(x) ? L : R) = x; } return L; } TE T bina_real(const Z &F, T L, T R, int c = 100) { while (c--) { T m = (L + R) / 2; (F(m) ? L : R) = m; } return (L + R) / 2; } TE Z pop(T &s) { if constexpr (requires { s.back(); }) { Z x = s.back(); return s.pop_back(), x; } else { Z x = s.top(); return s.pop(), x; } } void setp(int x) { cout << std::fixed << std::setprecision(x); } #line 1 "YRS/debug.hpp" #ifdef YRSD void DBG() { cerr << ']' << std::endl; } TE void DBG(T &&x, S &&...y) { cerr << x; if constexpr (sizeof...(S)) cerr << ", "; DBG(std::forward(y)...); } void DBG_ERR() { cerr << std::endl; } TE void DBG_ERR(T &&x, S &&...y) { cerr << x; if constexpr (sizeof...(S)) cerr << ", "; DBG_ERR(std::forward(y)...); } #define debug(...) cerr << '[' << __LINE__ << ']' << ": [" #__VA_ARGS__ "] = [", DBG(__VA_ARGS__) #define err(...) cerr << '[' << __LINE__ << ']' << ": ", DBG_ERR(__VA_ARGS__) #define asser assert #else #define debug(...) void(0721) #define err(...) void(0721) #define asser(...) void(0721) #endif #line 4 "No_1145_Sums_of_Powers.cpp" // #include "YRS/IO/fast_io.hpp" // #include "YRS/random/rng.hpp" #line 2 "YRS/po/f/sum_of_pow.hpp" #line 2 "YRS/po/conv_all.hpp" #line 2 "YRS/po/c/ntt_db.hpp" #line 2 "YRS/po/c/ntt.hpp" #line 2 "YRS/mod/modint.hpp" #line 2 "YRS/mod/modint_common.hpp" template concept is_mint = requires(T x) { { T::get_mod() }; { T::gen(0ull) } -> std::same_as; x.val; }; template mint inv(int n) { static constexpr int mod = mint::get_mod(); static vector dat = {0, 1}; assert(0 <= n); if (n >= mod) n %= mod; while (len(dat) <= n) { int k = len(dat); Z q = (mod + k - 1) / k; int r = k * q - mod; dat.ep(dat[r] * mint(q)); } return dat[n]; } template mint fact(int n) { static constexpr int mod = mint::get_mod(); static vector dat = {1, 1}; assert(0 <= n); if (n >= mod) return 0; while (len(dat) <= n) { int k = len(dat); dat.ep(dat[k - 1] * mint(k)); } return dat[n]; } template mint fact_inv(int n) { static vector dat = {1, 1}; if (n < 0) return mint(0); while (len(dat) <= n) dat.ep(dat[len(dat) - 1] * inv(len(dat))); return dat[n]; } template mint fact_invs(Ts... xs) { return(mint(1) * ... * fact_inv(xs)); } template mint multinomial(Head&& head, Tail&&... tail) { return fact(head) * fact_invs(std::forward(tail)...); } template mint C_dense(int n, int k) { assert(n >= 0); if (k < 0 or n < k) return 0; static vector> C; static int H = 0, W = 0; Z calc = [&](int i, int j) -> mint { if (i == 0) return(j == 0 ? mint(1) : mint(0)); return C[i - 1][j] + (j ? C[i - 1][j - 1] : 0); }; if (W <= k) { for (int i = 0; i < H; ++i) { C[i].resize(k + 1); for (int j = W; j < k + 1; ++j) { C[i][j] = calc(i, j); } } W = k + 1; } if (H <= n) { C.resize(n + 1); for (int i = H; i < n + 1; ++i) { C[i].resize(W); for (int j = 0; j < W; ++j) { C[i][j] = calc(i, j); } } H = n + 1; } return C[n][k]; } template mint C(ll n, ll k) { assert(n >= 0); if (k < 0 or n < k) return 0; if constexpr (dense) return C_dense(n, k); if constexpr (not large) return multinomial(n, k, n - k); k = std::min(k, n - k); mint x(1); for (int i = 0; i < k; ++i) x *= mint(n - i); return x * fact_inv(k); } template mint C_inv(ll n, ll k) { assert(n >= 0); assert(0 <= k and k <= n); if (not large) return fact_inv(n) * fact(k) * fact(n - k); return mint(1) / C(n, k); } // [x^d](1-x)^{-n} template mint C_negative(ll n, ll d) { assert(n >= 0); if (d < 0) return mint(0); if (n == 0) { return(d == 0 ? mint(1) : mint(0)); } return C(n + d - 1, d); } #line 4 "YRS/mod/modint.hpp" template struct modint { static constexpr uint umod = uint(mod); static_assert(umod < uint(1) << 31); uint val; static constexpr modint raw(uint v) { modint x; x.val = v; return x; } static constexpr modint gen(uint x) { modint s; s.val = x; return s; } constexpr modint() : val(0) {} constexpr modint(uint x) : val(x % umod) {} constexpr modint(ull x) : val(x % umod) {} constexpr modint(u128 x) : val(x % umod) {} constexpr modint(int x) : val((x %= mod) < 0 ? x + mod : x) {} constexpr modint(ll x) : val((x %= mod) < 0 ? x + mod : x) {} constexpr modint(i128 x) : val((x %= mod) < 0 ? x + mod : x) {} bool operator<(const modint &p) const { return val < p.val; } constexpr modint &operator+=(const modint &p) { if ((val += p.val) >= umod) val -= umod; return *this; } constexpr modint &operator-=(const modint &p) { if ((val += umod - p.val) >= umod) val -= umod; return *this; } constexpr modint &operator*=(const modint &p) { val = ull(val) * p.val % umod; return *this; } constexpr modint &operator/=(const modint &p) { *this *= p.inv(); return *this; } constexpr modint operator-() const { return modint::gen(val ? mod - val : uint(0)); } constexpr modint operator+(const modint &p) const { return modint(*this) += p; } constexpr modint operator-(const modint &p) const { return modint(*this) -= p; } constexpr modint operator*(const modint &p) const { return modint(*this) *= p; } constexpr modint operator/(const modint &p) const { return modint(*this) /= p; } bool operator==(const modint &p) const { return val == p.val; } bool operator!=(const modint &p) const { return val != p.val; } friend istream &operator>>(istream &is, modint &p) { ll x; is >> x; p = x; return is; } friend ostream &operator<<(ostream &os, modint p) { return os << p.val; } constexpr modint inv() const { int a = val, b = mod, x = 1, y = 0, t; while (b > 0) { t = a / b; swap(a -= t * b, b); swap(x -= t * y, y); } return modint(x); } constexpr modint pow(ll k) const { modint r(1), a(val); for (; k; k >>= 1, a *= a) if (k & 1) r *= a; return r; } static constexpr int get_mod() { return mod; } static constexpr PII ntt_info() { if constexpr (mod == 120586241) return {20, 74066978}; if (mod == 167772161) return {25, 17}; if (mod == 469762049) return {26, 30}; if (mod == 754974721) return {24, 362}; if (mod == 880803841) return {23, 211}; if (mod == 943718401) return {22, 663003469}; if (mod == 998244353) return {23, 31}; if (mod == 1004535809) return {21, 582313106}; if (mod == 1012924417) return {21, 368093570}; return {-1, -1}; } static constexpr bool can_ntt() { return ntt_info().fi != -1; } }; using M99 = modint<998244353>; using M17 = modint<1000000007>; #ifdef FIO template void rd(modint &x) { LL(y); x = y; } template void wt(modint x) { wt(x.val); } #endif #line 4 "YRS/po/c/ntt.hpp" template void ntt(vc &a, bool in) { asser(mint::can_ntt()); constexpr int p = mint::ntt_info().fi; constexpr uint mod = mint::get_mod(); static array r, ir, ra, ira, rat, irat; assert(p != -1 and len(a) <= (1 << max(0, p))); static bool ok = 0; if (not ok) { ok = 1; r[p] = mint::ntt_info().se; ir[p] = mint(1) / r[p]; FOR_R(i, p) { r[i] = r[i + 1] * r[i + 1]; ir[i] = ir[i + 1] * ir[i + 1]; } mint s = 1, in = 1; FOR(i, p - 1) { ra[i] = r[i + 2] * s; ira[i] = ir[i + 2] * in; s *= ir[i + 2]; in *= r[i + 2]; } s = 1, in = 1; FOR(i, p - 2) { rat[i] = r[i + 3] * s; irat[i] = ir[i + 3] * in; s *= ir[i + 3]; in *= r[i + 3]; } } int N = len(a), n = topbit(N); if (not in) { int sz = 0; while (sz < n) { if (n - sz == 1) { int p = 1 << (n - sz - 1); mint c = 1; FOR(s, 1 << sz) { int of = s << (n - sz); FOR(i, p) { mint l = a[i + of], r = a[i + of + p] * c; a[i + of] = l + r, a[i + of + p] = l - r; } c *= ra[topbit(~s & -~s)]; } ++sz; } else { int p = 1 << (n - sz - 2); mint c = 1, in = r[2]; FOR(s, 1 << sz) { mint r2 = c * c, r3 = r2 * c; int of = s << (n - sz); FOR(i, p) { constexpr ull m2 = ull(mod) * mod; ull a0 = a[i + of].val, a1 = ull(a[i + of + p].val) * c.val; ull a2 = ull(a[i + of + 2 * p].val) * r2.val; ull a3 = ull(a[i + of + 3 * p].val) * r3.val; ull t = (a1 + m2 - a3) % mod * in.val; ull na = m2 - a2; a[i + of] = a0 + a1 + a2 + a3; a[i + of + p] = a0 + a2 + m2 * 2 - a1 - a3; a[i + of + 2 * p] = a0 + na + t; a[i + of + 3 * p] = a0 + na + m2 - t; } c *= rat[topbit(~s & -~s)]; } sz += 2; } } } else { mint c = mint(1) / mint(len(a)); FOR(i, len(a)) a[i] *= c; int sz = n; while (sz) { if (sz == 1) { int p = 1 << (n - sz); mint c = 1; FOR(s, 1 << (sz - 1)) { int of = s << (n - sz + 1); FOR(i, p) { ull l = a[i + of].val, r = a[i + of + p].val; a[i + of] = l + r; a[i + of + p] = (mod + l - r) * c.val; } c *= ira[topbit(~s & -~s)]; } --sz; } else { int p = 1 << (n - sz); mint c = 1, in = ir[2]; FOR(s, 1 << (sz - 2)) { mint r2 = c * c, r3 = r2 * c; int of = s << (n - sz + 2); FOR(i, p) { ull a0 = a[i + of].val, a1 = a[i + of + p].val; ull a2 = a[i + of + 2 * p].val; ull a3 = a[i + of + 3 * p].val; ull x = (mod + a2 - a3) * in.val % mod; a[i + of] = a0 + a1 + a2 + a3; a[i + of + p] = (a0 + mod - a1 + x) * c.val; a[i + of + 2 * p] = (a0 + a1 + 2 * mod - a2 - a3) * r2.val; a[i + of + 3 * p] = (a0 + 2 * mod - a1 - x) * r3.val; } c *= irat[topbit(~s & -~s)]; } sz -= 2; } } } } #line 2 "YRS/po/c/transposed_ntt.hpp" template void transposed_ntt(vc &a, bool in) { static_assert(mint::can_ntt()); constexpr int p = mint::ntt_info().fi; constexpr uint mod = mint::get_mod(); static array r, ir, rt, irt, rat, irat; assert(p != -1 and len(a) <= (1 << max(0, p))); static bool ok = 0; if (not ok) { ok = 1; r[p] = mint::ntt_info().se; ir[p] = mint(1) / r[p]; FOR_R(i, p) { r[i] = r[i + 1] * r[i + 1]; ir[i] = ir[i + 1] * ir[i + 1]; } mint s = 1, in = 1; FOR(i, p - 1) { rt[i] = r[i + 2] * s; irt[i] = ir[i + 2] * in; s *= ir[i + 2]; in *= r[i + 2]; } s = 1, in = 1; FOR(i, p - 2) { rat[i] = r[i + 3] * s; irat[i] = ir[i + 3] * in; s *= ir[i + 3]; in *= r[i + 3]; } } int N = len(a), n = topbit(N); assert(N == 1 << n); if (not in) { int sz = n; while (sz > 0) { if (sz == 1) { int p = 1 << (n - sz); mint c = 1; FOR(s, 1 << (sz - 1)) { int of = s << (n - sz + 1); FOR(i, p) { ull l = a[i + of].val, r = a[i + of + p].val; a[i + of] = l + r, a[i + of + p] = (mod + l - r) * c.val; } c *= rt[topbit(~s & -~s)]; } --sz; } else { int p = 1 << (n - sz); mint c = 1, in = r[2]; FOR(s, 1 << (sz - 2)) { int of = s << (n - sz + 2); mint r2 = c * c, r3 = r2 * c; FOR(i, p) { ull a0 = a[i + of + 0 * p].val; ull a1 = a[i + of + 1 * p].val; ull a2 = a[i + of + 2 * p].val; ull a3 = a[i + of + 3 * p].val; ull x = (mod + a2 - a3) * in.val % mod; a[i + of] = a0 + a1 + a2 + a3; a[i + of + 1 * p] = (a0 + mod - a1 + x) * c.val; a[i + of + 2 * p] = (a0 + a1 + 2 * mod - a2 - a3) * r2.val; a[i + of + 3 * p] = (a0 + 2 * mod - a1 - x) * r3.val; } c *= rat[topbit(~s & -~s)]; } sz -= 2; } } } else { mint c = mint(1) / mint(len(a)); FOR(i, len(a)) a[i] *= c; int sz = 0; while (sz < n) { if (sz == n - 1) { int p = 1 << (n - sz - 1); mint c = 1; FOR(s, 1 << sz) { int of = s << (n - sz); FOR(i, p) { mint l = a[i + of], r = a[i + of + p] * c; a[i + of] = l + r, a[i + of + p] = l - r; } c *= irt[topbit(~s & -~s)]; } ++sz; } else { int p = 1 << (n - sz - 2); mint c = 1, in = ir[2]; FOR(s, 1 << sz) { mint r2 = c * c, r3 = r2 * c; int of = s << (n - sz); FOR(i, p) { ull m2 = ull(mod) * mod; ull a0 = a[i + of].val; ull a1 = ull(a[i + of + p].val) * c.val; ull a2 = ull(a[i + of + 2 * p].val) * r2.val; ull a3 = ull(a[i + of + 3 * p].val) * r3.val; ull t = (a1 + m2 - a3) % mod * in.val; ull na = m2 - a2; a[i + of] = a0 + a1 + a2 + a3; a[i + of + 1 * p] = a0 + a2 + (2 * m2 - a1 - a3); a[i + of + 2 * p] = a0 + na + t; a[i + of + 3 * p] = a0 + na + m2 - t; } c *= irat[topbit(~s & -~s)]; } sz += 2; } } } } #line 5 "YRS/po/c/ntt_db.hpp" template void ntt_db(vc &a) { static array rt; static bool ok = 0; if (not ok) { ok = 1; constexpr int s = mint::ntt_info().fi; rt[s] = mint::ntt_info().se; FOR_R(i, s) rt[i] = rt[i + 1] * rt[i + 1]; } if constexpr (not transposed) { int N = len(a); Z b = a; ntt(b, 1); mint r = 1, z = rt[topbit(N << 1)]; FOR(i, N) b[i] *= r, r *= z; ntt(b, 0); std::copy(all(b), std::back_inserter(a)); } else { int N = len(a) >> 1; vc t{a.begin(), a.begin() + N}; a = {a.begin() + N, a.end()}; transposed_ntt(a, 0); mint r = 1, z = rt[topbit(N << 1)]; FOR(i, N) a[i] *= r, r *= z; transposed_ntt(a, 1); FOR(i, N) a[i] += t[i]; } } #line 2 "YRS/po/convolution.hpp" #line 2 "YRS/mod/crt3.hpp" constexpr uint pow_constexpr(ull a, ull b, uint mod) { a %= mod; ull res = 1; FOR(32) { if (b & 1) res = res * a % mod; a = a * a % mod, b >>= 1; } return res; } template T CRT2(ull a0, ull a1) { static_assert(p0 < p1); static constexpr ull x0_1 = pow_constexpr(p0, p1 - 2, p1); ull c = (a1 - a0 + p1) * x0_1 % p1; return a0 + c * p0; } template T CRT3(ull a0, ull a1, ull a2) { static_assert(p0 < p1 and p1 < p2); static constexpr ull x1 = pow_constexpr(p0, p1 - 2, p1); static constexpr ull x2 = pow_constexpr(ull(p0) * p1 % p2, p2 - 2, p2); static constexpr ull p01 = ull(p0) * p1; ull c = (a1 - a0 + p1) * x1 % p1; ull ans_1 = a0 + c * p0; c = (a2 - ans_1 % p2 + p2) * x2 % p2; return T(ans_1) + T(c) * T(p01); } template T CRT4(ull a0, ull a1, ull a2, ull a3) { static_assert(p0 < p1 and p1 < p2 and p2 < p3); static constexpr ull x1 = pow_constexpr(p0, p1 - 2, p1); static constexpr ull x2 = pow_constexpr(ull(p0) * p1 % p2, p2 - 2, p2); static constexpr ull x3 = pow_constexpr(ull(p0) * p1 % p3 * p2 % p3, p3 - 2, p3); static constexpr ull p01 = ull(p0) * p1; ull c = (a1 - a0 + p1) * x1 % p1; ull ans_1 = a0 + c * p0; c = (a2 - ans_1 % p2 + p2) * x2 % p2; u128 ans_2 = ans_1 + c * static_cast(p01); c = (a3 - ans_2 % p3 + p3) * x3 % p3; return T(ans_2) + T(c) * T(p01) * T(p2); } template T CRT5(ull a0, ull a1, ull a2, ull a3, ull a4) { static_assert(p0 < p1 and p1 < p2 and p2 < p3 and p3 < p4); static constexpr ull x1 = pow_constexpr(p0, p1 - 2, p1); static constexpr ull x2 = pow_constexpr(ull(p0) * p1 % p2, p2 - 2, p2); static constexpr ull x3 = pow_constexpr(ull(p0) * p1 % p3 * p2 % p3, p3 - 2, p3); static constexpr ull x4 = pow_constexpr(ull(p0) * p1 % p4 * p2 % p4 * p3 % p4, p4 - 2, p4); static constexpr ull p01 = ull(p0) * p1; static constexpr ull p23 = ull(p2) * p3; ull c = (a1 - a0 + p1) * x1 % p1; ull ans_1 = a0 + c * p0; c = (a2 - ans_1 % p2 + p2) * x2 % p2; u128 ans_2 = ans_1 + c * static_cast(p01); c = static_cast(a3 - ans_2 % p3 + p3) * x3 % p3; u128 ans_3 = ans_2 + static_cast(c * p2) * p01; c = static_cast(a4 - ans_3 % p4 + p4) * x4 % p4; return T(ans_3) + T(c) * T(p01) * T(p23); } #line 5 "YRS/po/convolution.hpp" template vc conv_ntt(vc a, vc b) { static_assert(mint::can_ntt()); if (a.empty() or b.empty()) return {}; int N = len(a), M = len(b), sz = 1; while (sz < N + M - 1) sz <<= 1; a.resize(sz), b.resize(sz); bool ok = a == b; ntt(a, 0); if (ok) b = a; else ntt(b, 0); FOR(i, sz) a[i] *= b[i]; ntt(a, 1); a.resize(N + M - 1); return a; } template vc conv_mtt(const vc &a, const vc &b) { int N = len(a), M = len(b); if (not N or not M) return {}; static constexpr int p0 = 167772161; static constexpr int p1 = 469762049; static constexpr int p2 = 754974721; using M0 = modint; using M1 = modint; using M2 = modint; vc a0(N), b0(M); vc a1(N), b1(M); vc a2(N), b2(M); FOR(i, N) a0[i] = a[i].val, a1[i] = a[i].val, a2[i] = a[i].val; FOR(i, M) b0[i] = b[i].val, b1[i] = b[i].val, b2[i] = b[i].val; vc c0 = conv_ntt(a0, b0); vc c1 = conv_ntt(a1, b1); vc c2 = conv_ntt(a2, b2); vc c(len(c0)); FOR(i, N + M - 1) c[i] = CRT3(c0[i].val, c1[i].val, c2[i].val); return c; } template vc convolution(const vc &a, const vc &b) { int N = len(a), M = len(b); if (not N or not M) return {}; if constexpr (mint::can_ntt()) return conv_ntt(a, b); return conv_mtt(a, b); } #line 5 "YRS/po/conv_all.hpp" // O(Nlog^2N) 总度数为 N ，即使fi度数很低，logfi度数也可能很大，试图用exp|log算会变成 NMlogN template vc conv_all(vc> &f) { if (f.empty()) return {{mint(1)}}; while (1) { int N = len(f); if (N == 1) break; int m = (N + 1) >> 1; FOR(i, m) { if (i + i + 1 == N) f[i] = f[i << 1]; else f[i] = convolution(f[i << 1], f[i << 1 | 1]); } f.resize(m); } return f[0]; } // product 1 - f[i]x template vc conv_all_1(vc f) { if (not mint::can_ntt()) { vc> g; for (Z &x : f) g.ep(vc({mint(1), -x})); return conv_all(g); } int D = 6, N = 1, sz = len(f);; while (N < sz) N <<= 1; int k = topbit(N); vc F(N), nx(N); FOR(i, sz) F[i] = -f[i]; FOR(d, k) { int b = 1 << d; if (d < D) { fill(all(nx), mint(0)); FOR(L, 0, N, b << 1) { FOR(i, b) FOR(j, b) nx[L + i + j] += F[L + i] * F[L + b + j]; FOR(i, b) nx[L + b + i] += F[L + i] + F[L + b + i]; } } else if (d == D) { FOR(L, 0, N, b << 1) { vc f1 = {F.begin() + L, F.begin() + L + b}; vc f2 = {F.begin() + L + b, F.begin() + L + 2 * b}; f1.resize(b << 1), f2.resize(b << 1); ntt(f1, 0), ntt(f2, 0); FOR(i, b) nx[L + i] = f1[i] * f2[i] + f1[i] + f2[i]; FOR(i, b, b << 1) nx[L + i] = f1[i] * f2[i] - f1[i] - f2[i]; } } else { FOR(L, 0, N, b << 1) { vc f1 = {F.begin() + L, F.begin() + L + b}; vc f2 = {F.begin() + L + b, F.begin() + L + 2 * b}; ntt_db(f1), ntt_db(f2); FOR(i, b) nx[L + i] = f1[i] * f2[i] + f1[i] + f2[i]; FOR(i, b, b << 1) nx[L + i] = f1[i] * f2[i] - f1[i] - f2[i]; } } swap(F, nx); } if (k - 1 >= D) ntt(F, 1); F.ep(1), reverse(all(F)); F.resize(sz + 1); return F; } #line 2 "YRS/po/fps_log.hpp" #line 2 "YRS/po/fps_inv.hpp" #line 2 "YRS/po/c/count_terms.hpp" // 非 0 数量 template int count_terms(const vc &f){ int t = 0, N = len(f); FOR(i, N) if(f[i] != mint(0)) ++t; return t; } #line 5 "YRS/po/fps_inv.hpp" // O(NK) template vc fps_inv_sparse(const vc &f) { int N = len(f); vc> dat; FOR(i, 1, N) if (f[i] != mint(0)) dat.ep(i, f[i]); vc g(N); mint t = mint(1) / f[0]; g[0] = t; FOR(i, 1, N) { mint s = 0; for (Z &&[x, y] : dat) { if (x > i) break; s -= y * g[i - x]; } g[i] = s * t; } return g; } template vc fps_inv_dense_ntt(const vc &F) { vc G{mint(1) / F[0]}; int N = len(F), n = 1; G.reserve(N); while (n < N) { vc f(n << 1), g(n << 1); int L = min(N, n << 1); FOR(i, L) f[i] = F[i]; FOR(i, n) g[i] = G[i]; ntt(f, 0), ntt(g, 0); FOR(i, n << 1) f[i] *= g[i]; ntt(f, 1); FOR(i, n) f[i] = 0; ntt(f, 0); FOR(i, n << 1) f[i] *= g[i]; ntt(f, 1); FOR(i, n, L) G.ep(-f[i]); n <<= 1; } return G; } template vc fps_inv_dense(const vc &F) { if constexpr (mint::can_ntt()) return fps_inv_dense_ntt(F); int N = len(F); vc R = {mint(1) / F[0]}, p; int n = 1; while (n < N) { p = convolution(R, R); p.resize(n << 1); vc f = {F.begin(), F.begin() + min(n << 1, N)}; p = convolution(p, f); R.resize(n << 1); FOR(i, n + n) R[i] = R[i] + R[i] - p[i]; n <<= 1; } R.resize(N); return R; } template vc fps_inv(const vc &f) { assert(f[0] != mint(0)); int sz = count_terms(f), c = mint::can_ntt() ? 160 : 820; return sz <= c ? fps_inv_sparse(f) : fps_inv_dense(f); } #line 5 "YRS/po/fps_log.hpp" template vc fps_log_sparse(const vc &a) { int N = len(a); vc> dat; FOR(i, 1, N) if (a[i] != mint(0)) dat.ep(i, a[i]); vc f(N), g(N - 1); FOR(i, N - 1) { mint s = a[i + 1] * mint(i + 1); for (Z &&[x, y] : dat) { if (x > i) break; s -= y * g[i - x]; } g[i] = s; f[i + 1] = s * inv(i + 1); } return f; } template vc fps_log_dense(const vc &f) { assert(f[0] == mint(1)); int N = len(f); vc df = f; FOR(i, N) df[i] *= mint(i); df.erase(df.begin()); vc inf = fps_inv(f), g = convolution(df, inf); g.resize(N - 1); g.insert(g.begin(), 0); FOR(i, N) g[i] *= inv(i); return g; } template vc fps_log(const vc &f) { assert(f[0] == mint(1)); int n = count_terms(f), t = mint::can_ntt() ? 200 : 1200; return n <= t ? fps_log_sparse(f) : fps_log_dense(f); } #line 2 "YRS/po/fps_div.hpp" #line 5 "YRS/po/fps_div.hpp" template vc fps_div_sprase(vc f, vc &g) { if (g[0] != mint(1)) { mint c = g[0].inv(); for (Z &x : f) x *= c; for (Z &x : g) x *= c; } vc> dat; int N = len(g); FOR(i, 1, N) if (g[i] != mint(0)) dat.ep(i, -g[i]); N = len(f); FOR(i, N) for (Z[x, y] : dat) if (i >= x) f[i] += y * f[i - x]; return f; } // f/g 截断的商 template vc fps_div(vc f, vc g) { if (count_terms(f) < 100 and 0) return fps_div_sprase(f, g); int N = len(f); g.resize(N); g = fps_inv(g); f = convolution(f, g); f.resize(N); return f; } #line 6 "YRS/po/f/sum_of_pow.hpp" // sum of fi^n {n = 0, 1, ... N} template vc sum_of_pow(const vc &a, int N) { Z f = conv_all_1(a); f.resize(N + 1); f = fps_log(f); FOR(i, N + 1) f[i] = -f[i] * mint(i); f[0] = len(a); return f; } #line 7 "No_1145_Sums_of_Powers.cpp" #define tests 0 #define fl 0 #define DB 10 using mint = M99; void Yorisou() { INT(N, K); VEC(mint, a, N); vc r = sum_of_pow(a, K); r.erase(r.begin()); print(r); } #line 1 "YRS/aa/main.hpp" int main() { std::cin.tie(nullptr)->sync_with_stdio(false); int T = 1; if (fl) cerr.tie(0); if (tests and not fl) IN(T); for (int i = 0; i < T or fl; ++i) { Yorisou(); if (fl and i % DB == 0) cerr << "Case: " << i << '\n'; } return 0; } #line 20 "No_1145_Sums_of_Powers.cpp"