#1150856 (C++23) No.1145 Sums of Powers

提出ソース
結果

問題	No.1145 Sums of Powers
コンテスト
ユーザー	zeta
提出日時	2026-03-01 02:12:01
言語	C++23 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=c++23 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	AC
実行時間	147 ms / 2,000 ms
コード長	33,774 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	4,909 ms
コンパイル使用メモリ	314,056 KB
実行使用メモリ	10,124 KB
最終ジャッジ日時	2026-03-01 02:12:08
合計ジャッジ時間	5,020 ms
ジャッジサーバーID （参考情報）	judge2 / judge4
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
other	AC * 6
権限があれば一括ダウンロードができます
ソースコード

raw source code
#line 1 "No_1145_Sums_of_Powers.cpp"
#define YRSD
// #include "YRS/aa/fast.hpp"
#line 2 "YRS/all.hpp"

#line 2 "YRS/aa/head.hpp"

#include <iostream>
#include <algorithm>

#include <array>
#include <bitset>
#include <map>
#include <numeric>
#include <queue>
#include <set>
#include <string>
#include <tuple>

#include <bit>
#include <chrono>
#include <functional>
#include <iomanip>
#include <utility>
#include <type_traits>
#include <cassert>
#include <cctype>
#include <cmath>
#include <cstring>
#include <ctime>
#include <limits>
#include <ranges>
#include <concepts>

#define TE template <typename T>
#define TES template <typename T, typename ...S>
#define Z auto
#define ep emplace_back
#define eb emplace
#define fi first
#define se second
#define all(x) (x).begin(), (x).end()

#define OV4(a, b, c, d, e, ...) e
#define FOR1(a) for (int _ = 0; _ < (a); ++_)
#define FOR2(i, a) for (int i = 0; i < (a); ++i)
#define FOR3(i, a, b) for (int i = (a); i < (b); ++i)
#define FOR4(i, a, b, c) for (int i = (a); i < (b); i += (c))
#define FOR(...) OV4(__VA_ARGS__, FOR4, FOR3, FOR2, FOR1)(__VA_ARGS__)
#define FOR1_R(a) for (int _ = (a) - 1; _ >= 0; --_)
#define FOR2_R(i, a) for (int i = (a) - 1; i >= 0; --i)
#define FOR3_R(i, a, b) for (int i = (b) - 1; i >= (a); --i)
#define FOR4_R(i, a, b, c) for (int i = (b) - 1; i >= (a); i -= (c))
#define FOR_R(...) OV4(__VA_ARGS__, FOR4_R, FOR3_R, FOR2_R, FOR1_R)(__VA_ARGS__)
#define FOR_subset(t, s) for (int t = (s); t > -1; t = (t == 0 ? -1 : (t - 1) & s))

#define sort ranges::sort

using namespace std;

TE using vc = vector<T>;
TE using vvc = vc<vc<T>>;
TE using T1 = tuple<T>;
TE using T2 = tuple<T, T>;
TE using T3 = tuple<T, T, T>;
TE using T4 = tuple<T, T, T, T>;
TE using max_heap = priority_queue<T>;
TE using min_heap = priority_queue<T, vc<T>, greater<T>>;
using u8 = unsigned char; using uint = unsigned int; using ll = long long;      using ull = unsigned long long;
using ld = long double;   using i128 = __int128;     using u128 = __uint128_t;  using f128 = __float128;
using u16 = uint16_t;
using PII = pair<int, int>;   using PLL = pair<ll, ll>;

#ifdef YRSD
constexpr bool dbg = 1;
#else
constexpr bool dbg = 0;
#endif
#line 2 "YRS/IO/IO.hpp"

istream &operator>>(istream &I, i128 &x) {
  static string s;
  I >> s;
  int f = s[0] == '-';
  x = 0;
  const int N = (int)s.size();
  FOR(i, f, N) x = x * 10 + s[i] - '0';
  if (f) x = -x;
  return I;
}
ostream &operator<<(ostream &O, i128 x) {
  static string s;
  s.clear();
  bool f = x < 0;
  if (f) x = -x;
  while (x) s += '0' + x % 10, x /= 10;
  if (s.empty()) s += '0';
  if (f) s += '-';
  reverse(all(s));
  return O << s;
}
istream &operator>>(istream &I, f128 &x) {
  static string s;
  I >> s, x = stold(s);
  return I;
}
ostream &operator<<(ostream &O, const f128 x) { return O << ld(x); }
template <typename... S>
istream &operator>>(istream &I, tuple<S...> &t) {
  return apply([&I](Z &...s) { ((I >> s), ...); }, t), I;
}
template <typename T, typename U>
istream &operator>>(istream &I, pair<T, U> &x) {
  return I >> x.fi >> x.se;
}
template <typename T, typename U>
ostream &operator<<(ostream &O, const pair<T, U> &x) {
  return O << x.fi << ' ' << x.se;
}
TE requires requires(T &c) { begin(c); end(c); } and 
                          (not is_same_v<decay_t<T>, string>)
istream &operator>>(istream &I, T &c) {
  for (Z &e : c) I >> e;
  return I;
}
TE requires requires(const T &c) { begin(c); end(c); } and 
  (not is_same_v<decay_t<T>, const char*>) and 
  (not is_same_v<decay_t<T>, string>) and 
  (not is_array_v<remove_reference_t<T>> or 
   not is_same_v<remove_extent_t<remove_reference_t<T>>, char>)
ostream &operator<<(ostream &O, const T &a) {
  if (a.empty()) return O;
  Z i = a.begin();
  O << *i++;
  for (; i != a.end(); ++i) O << ' ' << *i;
  return O;
}
void IN() {}
TE void IN(T &x, Z &...s) { cin >> x, IN(s...); }
void print() { cout << '\n'; }
TES void print(T &&x, S &&...y) {
  cout << x;
  if constexpr (sizeof...(S)) cout << ' ';
  print(forward<S>(y)...);
}
void put() {}
TES void put(T &&x, S &&...y) {
  cout << x;
  put(forward<S>(y)...);
}

#define INT(...)  int    __VA_ARGS__; IN(__VA_ARGS__)
#define UINT(...) uint   __VA_ARGS__; IN(__VA_ARGS__)
#define LL(...)   ll     __VA_ARGS__; IN(__VA_ARGS__)
#define ULL(...)  ull    __VA_ARGS__; IN(__VA_ARGS__)
#define I128(...) i128   __VA_ARGS__; IN(__VA_ARGS__)
#define STR(...)  string __VA_ARGS__; IN(__VA_ARGS__)
#define CH(...)   char   __VA_ARGS__; IN(__VA_ARGS__)
#define REAL(...) re     __VA_ARGS__; IN(__VA_ARGS__)
#define VEC(T, a, n) vc<T> a(n); IN(a)

void YES(bool o = 1) { print(o ? "YES" : "NO"); }
void Yes(bool o = 1) { print(o ? "Yes" : "No"); }
void yes(bool o = 1) { print(o ? "yes" : "no"); }
void NO(bool o = 1) { YES(not o); }
void No(bool o = 1) { Yes(not o); }
void no(bool o = 1) { yes(not o); }
void ALICE(bool o = 1) { print(o ? "ALICE" : "BOB"); }
void Alice(bool o = 1) { print(o ? "Alice" : "Bob"); }
void alice(bool o = 1) { print(o ? "alice" : "bob"); }
void BOB(bool o = 1) { ALICE(not o); }
void Bob(bool o = 1) { Alice(not o); }
void bob(bool o = 1) { alice(not o); }
void POSSIBLE(bool o = 1) { print(o ? "POSSIBLE" : "IMPOSSIBLE"); }
void Possible(bool o = 1) { print(o ? "Possible" : "Impossible"); }
void possible(bool o = 1) { print(o ? "possible" : "impossible"); }
void IMPOSSIBLE(bool o = 1) { POSSIBLE(not o); }
void Impossible(bool o = 1) { Possible(not o); }
void impossible(bool o = 1) { possible(not o); }
void TAK(bool o = 1) { print(o ? "TAK" : "NIE"); }
void NIE(bool o = 1) { TAK(not o); }
#line 5 "YRS/all.hpp"

#if (__cplusplus >= 202002L)
#include <numbers>
constexpr ld pi = numbers::pi_v<ld>;
#endif
TE constexpr T inf = numeric_limits<T>::max();
template <> constexpr i128 inf<i128> = i128(inf<ll>) * 2'000'000'000'000'000'000;
template <typename T, typename U>
constexpr pair<T, U> inf<pair<T, U>> = {inf<T>, inf<U>};

TE constexpr static inline int pc(T x) { return popcount(make_unsigned_t<T>(x)); }
constexpr static inline ll len(const Z &a) { return a.size(); }

void reverse(Z &a) { reverse(all(a)); }

void unique(Z &a) {
  sort(a);
  a.erase(unique(all(a)), a.end());
}
TE vc<int> inverse(const vc<T> &a) {
  int N = len(a);
  vc<int> b(N, -1);
  FOR(i, N) if (a[i] != -1) b[a[i]] = i;
  return b;
}

Z QMAX(const Z &a) { return *max_element(all(a)); }
Z QMIN(const Z &a) { return *min_element(all(a)); }
TE Z QMAX(T l, T r) { return *max_element(l, r); }
TE Z QMIN(T l, T r) { return *min_element(l, r); }
constexpr bool chmax(Z &a, const Z &b) { return (a < b ? a = b, 1 : 0); }
constexpr bool chmin(Z &a, const Z &b) { return (a > b ? a = b, 1 : 0); }

vc<int> argsort(const Z &a) {
  vc<int> I(len(a));
  iota(all(I), 0);
  sort(I, [&](int i, int k) { return a[i] < a[k] or (a[i] == a[k] and i < k); });
  return I;
}
TE vc<T> rearrange(const vc<T> &a, const vc<int> &I) {
  int N = len(I);
  vc<T> b(N);
  FOR(i, N) b[i] = a[I[i]];
  return b;
}
template <int of = 1, typename T> 
vc<T> pre_sum(const vc<T> &a) {
  int N = len(a);
  vc<T> c(N + 1);
  FOR(i, N) c[i + 1] = c[i] + a[i];
  if (of == 0) c.erase(c.begin());
  return c;
}

TE constexpr static int topbit(T x) {
  if (x == 0) return - 1;
  if constexpr (sizeof(T) <= 4) return 31 - __builtin_clz(x);
  else return 63 - __builtin_clzll(x);
}
TE constexpr static int lowbit(T x) {
  if (x == 0) return -1;
  if constexpr (sizeof(T) <= 4) return __builtin_ctz(x);
  else return __builtin_ctzll(x);
}

TE constexpr T floor(T x, T y) { return x / y - (x % y and (x ^ y) < 0); }
TE constexpr T ceil(T x, T y) { return floor(x + y - 1, y); }
TE constexpr T bmod(T x, T y) { return x - floor(x, y) * y; }
TE constexpr pair<T, T> divmod(T x, T y) {
  T q = floor(x, y);
  return pair{q, x - q * y};
}
template <typename T = ll>
T SUM(const Z &v) {
  return accumulate(all(v), T(0));
}
int lb(const Z &a, Z x) { return lower_bound(all(a), x) - a.begin(); }
TE int lb(T l, T r, Z x) { return lower_bound(l, r, x) - l; }
int ub(const Z &a, Z x) { return upper_bound(all(a), x) - a.begin(); }
TE int ub(T l, T r, Z x) { return upper_bound(l, r, x) - l; }

template <bool ck = 1>
ll bina(Z f, ll l, ll r) {
  if constexpr (ck) assert(f(l));
  while (abs(l - r) > 1) {
    ll x = (r + l) >> 1;
    (f(x) ? l : r) = x;
  }
  return l;
}
TE T bina_real(Z f, T l, T r, int c = 100) {
  while (c--) {
    T x = (l + r) / 2;
    (f(x) ? l : r) = x;
  }
  return (l + r) / 2;
}

Z pop(Z &s) {
  if constexpr (requires { s.pop_back(); }) {
    Z x = s.back();
    return s.pop_back(), x;
  } else if constexpr (requires { s.top(); }) {
    Z x = s.top();
    return s.pop(), x;
  } else {
    Z x = s.front();
    return s.pop(), x;
  }
}
void setp(int x) { cout << fixed << setprecision(x); }

TE inline void sh(vc<T> &a, int N, T b = {}) {
  a.resize(N, b);
}
#line 1 "YRS/debug.hpp"
#ifdef YRSD
void DBG() { cerr << "]" << endl; }
TES void DBG(T &&x, S &&...y) {
  cerr << x;
  if constexpr (sizeof...(S)) cerr << ", ";
  DBG(forward<S>(y)...);
}
#define debug(...) cerr << "[" << __LINE__ << "]: [" #__VA_ARGS__ "] = [", DBG(__VA_ARGS__)
void ERR() { cerr << endl; }
TES void ERR(T &&x, S &&...y) {
  cerr << x;
  if constexpr (sizeof...(S)) cerr << ", ";
  ERR(forward<S>(y)...);
}
#define err(...) cerr << "[" << __LINE__ << "]: ", ERR(__VA_ARGS__)
#define asser assert
#else
#define debug(...) void(0721)
#define err(...)   void(0721)
#endif
#line 2 "YRS/IO/fast_io.hpp"

#define FIO

static constexpr uint SZ = 1 << 17;
char ibuf[SZ];
char obuf[SZ];
char out[100];
// pointer of ibuf, obuf
uint pil = 0, pir = 0, por = 0;

struct Pre {
  char num[10000][4];
  constexpr Pre() : num() {
    for (int i = 0; i < 10000; i++) {
      int n = i;
      for (int j = 3; j >= 0; j--) {
        num[i][j] = n % 10 | '0';
        n /= 10;
      }
    }
  }
} constexpr pre;

inline void load() {
  memcpy(ibuf, ibuf + pil, pir - pil);
  pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
  pil = 0;
  if (pir < SZ) ibuf[pir++] = '\n';
}

inline void flush() {
  fwrite(obuf, 1, por, stdout);
  por = 0;
}
inline void rd(char &c) {
  do {
    if (pil + 1 > pir) load();
    c = ibuf[pil++];
  } while (isspace(c));
}

inline void rd(string &x) {
  x.clear();
  char c;
  do {
    if (pil + 1 > pir) load();
    c = ibuf[pil++];
  } while (isspace(c));
  do {
    x += c;
    if (pil == pir) load();
    c = ibuf[pil++];
  } while (!isspace(c));
}

TE inline void rd_real(T &x) {
  string s;
  rd(s);
  x = stod(s);
}

TE inline void rd_integer(T &x) {
  if (pil + 100 > pir) load();
  char c;
  do c = ibuf[pil++];
  while (c < '-');
  bool minus = 0;
  if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
    if (c == '-') {
      minus = 1, c = ibuf[pil++];
    }
  }
  x = 0;
  while ('0' <= c) {
    x = x * 10 + (c & 15), c = ibuf[pil++];
  }
  if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
    if (minus) x = -x;
  }
}

inline void rd(int16_t &x) { rd_integer(x); }
inline void rd(uint16_t &x) { rd_integer(x); }
inline void rd(int &x) { rd_integer(x); }
inline void rd(long &x) { rd_integer(x); }
inline void rd(ll &x) { rd_integer(x); }
inline void rd(i128 &x) { rd_integer(x); }
inline void rd(uint &x) { rd_integer(x); }
inline void rd(ull &x) { rd_integer(x); }
inline void rd(u128 &x) { rd_integer(x); }
inline void rd(double &x) { rd_real(x); }
inline void rd(long double &x) { rd_real(x); }
inline void rd(f128 &x) { rd_real(x); }

template <typename T, typename U>
inline void rd(pair<T, U> &p) {
  return rd(p.fi), rd(p.se);
}
template <size_t N = 0, typename T>
inline void rd_tuple(T &t) {
  if constexpr (N < tuple_size<T>::value) {
    Z &x = get<N>(t);
    rd(x);
    rd_tuple<N + 1>(t);
  }
}
template <typename... T>
inline void rd(tuple<T...> &tpl) {
  rd_tuple(tpl);
}

template <size_t N = 0, typename T>
inline void rd(array<T, N> &x) {
  for (Z &e : x) rd(e);
}
TE inline void rd(vc<T> &x) {
  for (Z &e : x) rd(e);
}

inline void read() {}
template <typename H, typename... T>
inline void read(H &h, T &...t) {
  rd(h), read(t...);
}

inline void wt(const char c) {
  if (por == SZ) flush();
  obuf[por++] = c;
}
inline void wt(const string s) {
  for (char c : s) wt(c);
}
inline void wt(const char *s) {
  size_t len = strlen(s);
  for (size_t i = 0; i < len; i++) wt(s[i]);
}

TE inline void wt_integer(T x) {
  if (por > SZ - 100) flush();
  if (x < 0) {
    obuf[por++] = '-', x = -x;
  }
  int outi;
  for (outi = 96; x >= 10000; outi -= 4) {
    memcpy(out + outi, pre.num[x % 10000], 4);
    x /= 10000;
  }
  if (x >= 1000) {
    memcpy(obuf + por, pre.num[x], 4);
    por += 4;
  } else if (x >= 100) {
    memcpy(obuf + por, pre.num[x] + 1, 3);
    por += 3;
  } else if (x >= 10) {
    int q = (x * 103) >> 10;
    obuf[por] = q | '0';
    obuf[por + 1] = (x - q * 10) | '0';
    por += 2;
  } else
    obuf[por++] = x | '0';
  memcpy(obuf + por, out + outi + 4, 96 - outi);
  por += 96 - outi;
}

TE inline void wt_real(T x) {
  ostringstream oss;
  oss << fixed << setprecision(10) << double(x);
  string s = oss.str();
  wt(s);
}

inline void wt(int x) { wt_integer(x); }
inline void wt(long x) { wt_integer(x); }
inline void wt(ll x) { wt_integer(x); }
inline void wt(i128 x) { wt_integer(x); }
inline void wt(uint x) { wt_integer(x); }
inline void wt(ull x) { wt_integer(x); }
inline void wt(u128 x) { wt_integer(x); }
inline void wt(double x) { wt_real(x); }
inline void wt(long double x) { wt_real(x); }
inline void wt(f128 x) { wt_real(x); }

template <typename T, typename U>
inline void wt(const pair<T, U> &val) {
  wt(val.fi);
  wt(' ');
  wt(val.se);
}
template <size_t N = 0, typename T>
inline void wt_tuple(const T &t) {
  if constexpr (N < tuple_size<T>::value) {
    if constexpr (N > 0) {
      wt(' ');
    }
    const Z x = get<N>(t);
    wt(x);
    wt_tuple<N + 1>(t);
  }
}
template <typename... T>
inline void wt(tuple<T...> &tpl) {
  wt_tuple(tpl);
}
template <typename T, size_t S>
inline void wt(const array<T, S> &val) {
  Z n = val.size();
  for (size_t i = 0; i < n; i++) {
    if (i) wt(' ');
    wt(val[i]);
  }
}
TE inline void wt(const vc<T> &a) {
  int N = len(a);
  FOR(i, N) {
    if (i) wt(' ');
    wt(a[i]);
  }
}
TE inline void wt(const vc<vc<T>> &v) {
  int N = len(v);
  FOR(i, N) {
    wt(v[i]);
    if (i + 1 != N) wt('\n');
  }
}
template <typename T, const size_t s>
inline void wt(const vc<array<T, s>> &v) {
  int N = len(v);
  FOR(i, N) {
    wt(v[i]);
    if (i + 1 != N) wt('\n');
  }
}

// gcc expansion. called automaticall after main.
inline void __attribute__((destructor)) _d() { flush(); }

inline void println() { wt('\n'); }
template <typename Head, typename... Tail>
inline void println(Head &&head, Tail &&...tail) {
  wt(head);
  if (sizeof...(Tail)) wt(' ');
  println(forward<Tail>(tail)...);
}

#define IN(...) read(__VA_ARGS__)
#define print(...) println(__VA_ARGS__)
#define FLUSH() flush()
#line 6 "No_1145_Sums_of_Powers.cpp"
// #include "YRS/random/rng.hpp"
// #include "YRS/ds/basic/retsu.hpp"
// #include "YRS/mod/mint.hpp"
// #include "YRS/aa/def.hpp"
#line 2 "YRS/poly/fps_t.hpp"

#line 2 "YRS/mod/mint_t.hpp"

#define c constexpr
template <int mod>
struct mint_t {
  using T = mint_t;
  static c uint m = mod;
  uint x;

  c inline uint val() const { return x; }

  c mint_t() : x(0) {}
  c mint_t(uint x) : x(x % m) {}
  c mint_t(ull x) : x(x % m) {}
  c mint_t(u128 x) : x(x % m) {}
  c mint_t(int x) : x((x %= mod) < 0 ? x + mod : x) {}
  c mint_t(ll x) : x((x %= mod) < 0 ? x + mod : x) {}
  c mint_t(i128 x) : x((x %= mod) < 0 ? x + mod : x) {}

  c T &operator+=(T p) {
    if ((x += p.x) >= m) x -= m;
    return *this;
  }
  c T &operator-=(T p) {
    if ((x += m - p.x) >= m) x -= m;
    return *this;
  }
  c T operator+(T p) const { return T(*this) += p; }
  c T operator-(T p) const { return T(*this) -= p; }

  c T &operator*=(T p) {
    x = ull(x) * p.x % m;
    return *this;
  }
  c T operator*(T p) const { return T(*this) *= p; }

  c T &operator/=(T p) { return *this *= p.inv(); }
  c T operator/(T p) const { return T(*this) /= p; }

  c T operator-() const { return T::gen(x ? mod - x : 0); }

  c T inv() const {
    int a = x, b = mod, x = 1, y = 0;
    while (b > 0) {
      int t = a / b;
      swap(a -= t * b, b);
      swap(x -= t * y, y);
    }
    return T(x);
  }

  c T pow(ll k) const {
    if (k < 0) return inv().pow(-k);
    T s(1), a(x);
    for (; k; k >>= 1, a *= a)
      if (k & 1) s *= a;
    return s;
  }

  c bool operator<(T p) const { return x < p.x; }
  c bool operator==(T p) const { return x == p.x; }
  c bool operator!=(T p) const { return x != p.x; }

  static c T gen(uint x) {
    T s;
    s.x = x;
    return s;
  }

  friend istream &operator>>(istream &cin, T &p) {
    ll t;
    cin >> t;
    p = t;
    return cin;
  }
  
  friend ostream &operator<<(ostream &cout, T p) { return cout << p.x; }

  static c int get_mod() { return mod; }

  static c PII ntt_info() {
    if (mod == 167772161) return {25, 17};
    if (mod == 469762049) return {26, 30};
    if (mod == 754974721) return {24, 362};
    if (mod == 998244353) return {23, 31};
    if (mod == 120586241) return {20, 74066978};
    if (mod == 880803841) return {23, 211};
    if (mod == 943718401) return {22, 663003469};
    if (mod == 1004535809) return {21, 582313106};
    if (mod == 1012924417) return {21, 368093570};
    return {-1, -1};
  }
  
  static c bool can_ntt() { return ntt_info().fi != -1; }
};
#undef c

using M99 = mint_t<998244353>;
using M17 = mint_t<1000000007>;

#ifdef FIO
template <int mod>
void rd(mint_t<mod> &x) {
  LL(y);
  x = y;
}
template <int mod>
void wt(mint_t<mod> x) {
  wt(x.x);
}
#endif
#line 2 "YRS/poly/binom.hpp"

TE struct binom {
  const int p = T::get_mod();
  vc<T> fa{1, 1}, ifa{1, 1}, in{0, 1};
  
  T inv(int n) {
    assert(0 <= n);
    while (len(in) <= n) {
      int k = len(in);
      int q = (p + k - 1) / k;
      int r = k * q - p;
      in.ep(in[r] * T(q));
    }
    return in[n];
  }

  T fac(int n) {
    if (n >= p) return 0;
    while (len(fa) <= n) {
      int k = len(fa);
      fa.ep(fa[k - 1] * T(k));
    }
    return fa[n];
  }

  T ifac(int n) {
    if (n < 0) return T(0);
    while (len(ifa) <= n) ifa.ep(ifa.back() * inv(len(ifa)));
    return ifa[n];
  }

  T C(int N, int K) {
    assert(N >= 0);
    if (K < 0 or N < K) return 0;
    return fac(N) * ifac(K) * ifac(N - K);
  }

  T lucas(ll N, ll K) {
    if (K > N) return 0;
    if (K == 0) return 1;
    return C(N % p, K % p) * lucas(N / p, K / p);
  }

  T C_naive(ll N, ll K) {
    assert(N >= 0);
    if (K < 0 or N < K) return 0;
    chmin(K, N - K);
    T x = 1;
    FOR(i, K) x *= (N - i);
    return x * ifac(K);
  }
};
#line 5 "YRS/poly/fps_t.hpp"

// 动态模数需要在 设置模数后 进行构造
TE struct fps_t : binom<T> {
  using fps = vc<T>;
  using cf = const fps;
  using U = binom<T>;
  using U::inv, U::fac, U::ifac, U::C;
  
  static void sh(fps &a, int N) { a.resize(N); }

  // 非0项数量
  static int count_terms(cf &f) {
    int s = 0, N = len(f);
    FOR(i, N) s += f[i].val() != 0;
    return s;
  }

  static constexpr int p0 = 167'772'161, p1 = 469'762'049, p2 = 754'974'721;

  T crt(ull a, ull b, ull c) {
    constexpr ull x = 104'391'568, xx = 190'329'765;
    ull t = (b - a + p1) * x % p1, s = a + t * p0;
    t = (c - s % p2 + p2) * xx % p2;
    return T(s) + T(t) * T(ull(p0) * p1);
  }

  static void ntt(fps &a, bool in) {
    assert(T::can_ntt());
    const int p = T::ntt_info().fi;
    const uint m = T::get_mod();
    static array<T, 30> r, ir, ra, ira, rat, irat;
    assert(p != -1 and len(a) <= (1 << max(0, p)));
    static bool ok = 0;
    if (not ok) {
      ok = 1;
      r[p] = T::ntt_info().se;
      ir[p] = T(1) / r[p];
      FOR_R(i, p) {
        r[i] = r[i + 1] * r[i + 1];
        ir[i] = ir[i + 1] * ir[i + 1];
      }
      T s = 1, in = 1;
      FOR(i, p - 1) {
        ra[i] = r[i + 2] * s;
        ira[i] = ir[i + 2] * in;
        s *= ir[i + 2];
        in *= r[i + 2];
      }
      s = 1, in = 1;
      FOR(i, p - 2) {
        rat[i] = r[i + 3] * s;
        irat[i] = ir[i + 3] * in;
        s *= ir[i + 3];
        in *= r[i + 3];
      }
    }

    int N = len(a), n = topbit(N);
    if (not in) {
      int sz = 0;
      while (sz < n) {
        if (n - sz == 1) {
          int p = 1 << (n - sz - 1);
          T c = 1;
          FOR(s, 1 << sz) {
            int of = s << (n - sz);
            FOR(i, p) {
              T l = a[i + of], r = a[i + of + p] * c;
              a[i + of] = l + r, a[i + of + p] = l - r;
            }
            c *= ra[topbit(~s & -~s)];
          }
          ++sz;
        } else {
          int p = 1 << (n - sz - 2);
          T c = 1, in = r[2];
          FOR(s, 1 << sz) {
            T r2 = c * c, r3 = r2 * c;
            int of = s << (n - sz);
            FOR(i, p) {
              const ull mm = ull(m) * m;
              ull a0 = a[i + of].val(), a1 = ull(a[i + of + p].val()) * c.val();
              ull aa = ull(a[i + of + 2 * p].val()) * r2.val();
              ull bb = ull(a[i + of + 3 * p].val()) * r3.val();
              ull t = (a1 + mm - bb) % m * in.val();
              ull na = mm - aa;
              a[i + of] = a0 + a1 + aa + bb;
              a[i + of + p] = a0 + aa + mm * 2 - a1 - bb;
              a[i + of + 2 * p] = a0 + na + t;
              a[i + of + 3 * p] = a0 + na + mm - t;
            }
            c *= rat[topbit(~s & -~s)];
          }
          sz += 2;
        }
      }
    } else {
      T c = T(1) / T(N);
      FOR(i, N) a[i] *= c;
      int sz = n;
      while (sz) {
        if (sz == 1) {
          int p = 1 << (n - sz);
          T c = 1;
          FOR(s, 1 << (sz - 1)) {
            int of = s << (n - sz + 1);
            FOR(i, p) {
              ull l = a[i + of].val(), r = a[i + of + p].val();
              a[i + of] = l + r;
              a[i + of + p] = (m + l - r) * c.val();
            }
            c *= ira[topbit(~s & -~s)];
          }
          --sz;
        } else {
          int p = 1 << (n - sz);
          T c = 1, in = ir[2];
          FOR(s, 1 << (sz - 2)) {
            T r2 = c * c, r3 = r2 * c;
            int of = s << (n - sz + 2);
            FOR(i, p) {
              ull a0 = a[i + of].val(), a1 = a[i + of + p].val();
              ull aa = a[i + of + 2 * p].val();
              ull bb = a[i + of + 3 * p].val();
              ull x = (m + aa - bb) * in.val() % m;
              a[i + of] = a0 + a1 + aa + bb;
              a[i + of + p] = (a0 + m - a1 + x) * c.val();
              a[i + of + 2 * p] = (a0 + a1 + 2 * m - aa - bb) * r2.val();
              a[i + of + 3 * p] = (a0 + 2 * m - a1 - x) * r3.val();
            }
            c *= irat[topbit(~s & -~s)];
          }
          sz -= 2;
        }
      }
    }
  }

  fps conv_naive(cf &a, cf &b) {
    int N = len(a), M = len(b), sz = N + M - 1;
    if (not N or not M) return {};
    if (N > M) return conv_naive(b, a);
    fps c(sz);
    FOR(i, N) FOR(k, M) c[i + k] += a[i] * b[k];
    return c;
  }

  fps conv_kara(cf &f, cf &g) {
    constexpr int lm = 30;
    if (min(len(f), len(g)) <= lm) return conv_naive(f, g);
    int N = max(len(f), len(g)), M = ceil(N, 2);
    fps f1, f2, g1, g2;
    if (len(f) < M) f1 = f;
    if (len(f) >= M) f1 = {f.begin(), f.begin() + M};
    if (len(f) >= M) f2 = {f.begin() + M, f.end()};
    if (len(g) < M) g1 = g;
    if (len(g) >= M) g1 = {g.begin(), g.begin() + M};
    if (len(g) >= M) g2 = {g.begin() + M, g.end()};
    fps a = conv_kara(f1, g1);
    fps b = conv_kara(f2, g2);
    FOR(i, len(f2)) f1[i] += f2[i];
    FOR(i, len(g2)) g1[i] += g2[i];
    fps c = conv_kara(f1, g1);
    fps F(len(f) + len(g) - 1);
    FOR(i, len(a)) F[i] += a[i], c[i] -= a[i];
    FOR(i, len(b)) F[2 * M + i] += b[i], c[i] -= b[i];
    if (c.back() == T(0)) c.pop_back();
    FOR(i, len(c)) if (c[i] != T(0)) F[M + i] += c[i];
    return F;
  }

  static fps conv_ntt(fps a, fps b) {
    assert(T::can_ntt());
    int N = len(a), M = len(b), sz = 1;
    if (min(N, M) == 0) return {};
    while (sz < N + M - 1) sz <<= 1;
    sh(a, sz), sh(b, sz);
    ntt(a, 0);
    ntt(b, 0);
    FOR(i, sz) a[i] *= b[i];
    ntt(a, 1);
    sh(a, N + M - 1);
    return a;
  }

  fps conv_mtt(cf &a, cf &b) {
    int N = len(a), M = len(b);
    if (not N or not M) return {};
    using f0 = fps_t<mint_t<p0>>;
    using f1 = fps_t<mint_t<p1>>;
    using f2 = fps_t<mint_t<p2>>;
    f0::fps a0(N), b0(M);
    f1::fps a1(N), b1(M);
    f2::fps a2(N), b2(M);
    FOR(i, N) a0[i] = a[i].val(), a1[i] = a[i].val(), a2[i] = a[i].val();
    FOR(i, M) b0[i] = b[i].val(), b1[i] = b[i].val(), b2[i] = b[i].val();
    Z c0 = f0::conv_ntt(a0, b0);
    Z c1 = f1::conv_ntt(a1, b1);
    Z c2 = f2::conv_ntt(a2, b2);
    fps c(len(c0));
    FOR(i, N + M - 1) c[i] = crt(c0[i].val(), c1[i].val(), c2[i].val());
    return c;
  }

  fps conv(cf &a, cf &b) {
    int N = len(a), M = len(b);
    if (min(N, M) == 0) return {};
    if (T::can_ntt()) {
      if (min(N, M) <= 50) return conv_kara(a, b);
      return conv_ntt(a, b);
    }
    if (min(N, M) <= 200) return conv_kara(a, b);
    return conv_mtt(a, b);
  }

  static fps sq_ntt(fps a) {
    assert(T::can_ntt());
    int N = len(a), sz = 1;
    if (N == 0) return {};
    while (sz < N + N - 1) sz <<= 1;
    sh(a, sz);
    ntt(a, 0);
    FOR(i, sz) a[i] *= a[i];
    ntt(a, 1);
    sh(a, N + N - 1);
    return a;
  }

  fps sq_mtt(cf &a) {
    int N = len(a);
    if (N == 0) return {};
    using f0 = fps_t<mint_t<p0>>;
    using f1 = fps_t<mint_t<p1>>;
    using f2 = fps_t<mint_t<p2>>;
    f0::fps a0(N);
    f1::fps a1(N);
    f2::fps a2(N);
    FOR(i, N) a0[i] = a[i].val(), a1[i] = a[i].val(), a2[i] = a[i].val();
    Z c0 = f0::sq_ntt(a0);
    Z c1 = f1::sq_ntt(a1);
    Z c2 = f2::sq_ntt(a2);
    fps c(len(c0));
    FOR(i, N + N - 1) c[i] = crt(c0[i].val(), c1[i].val(), c2[i].val());
    return c;
  }

  fps sq(cf &a) {
    int N = len(a);
    if (T::can_ntt()) {
      if (N <= 50) return conv_naive(a, a);
      return sq_ntt(a);
    }
    if (N <= 150) return conv_kara(a, a);
    return sq_mtt(a);
  }

  // 微分
  fps diff(cf &f) {
    int N = len(f);
    if (N <= 1) return {};
    fps g(N - 1);
    FOR(i, N - 1) g[i] = f[i + 1] * T(i + 1);
    return g;
  }
  // 积分
  fps inte(cf &f) {
    int N = len(f);
    fps g(N + 1);
    FOR(i, 1, N + 1) g[i] = f[i - 1] * inv(i);
    return g;
  }
  // 定积分
  T inte(cf &f, T l, T r) {
    T s = 0, L = 1, R = 1;
    int N = len(f);
    FOR(i, N) {
      L *= l, R *= r;
      s += inv(i + 1) * f[i] * (L - R);
    }
    return s;
  }

  fps inv_sp(cf &f) {
    int N = len(f);
    vc<pair<int, T>> a;
    FOR(i, 1, N) if (f[i] != T(0)) a.ep(i, f[i]);
    fps g(N);
    T t = T(1) / f[0];
    g[0] = t;
    FOR(i, 1, N) {
      T s = 0;
      for (Z &&[x, y] : a) {
        if (x > i) break;
        s -= y * g[i - x];
      }
      g[i] = s * t;
    }
    return g;
  }

  fps inv_ntt(cf &a) {
    fps s{T(1) / a[0]};
    int N = len(a), n = 1;
    s.reserve(N);
    for (; n < N; n <<= 1) {
      fps f(n << 1), g(n << 1);
      int sz = min(N, n << 1);
      FOR(i, sz) f[i] = a[i];
      FOR(i, n) g[i] = s[i];
      ntt(f, 0);
      ntt(g, 0);
      FOR(i, n << 1) f[i] *= g[i];
      ntt(f, 1);

      FOR(i, n) f[i] = 0;
      ntt(f, 0);
      FOR(i, n << 1) f[i] *= g[i];
      ntt(f, 1);
      FOR(i, n, sz) s.ep(-f[i]);
    }
    return s;
  }

  fps inv_mtt(cf &a) {
    int N = len(a), n = 1;
    fps c{a[0].inv()}, p;
    for (; n < N; n <<= 1) {
      p = sq(c);
      sh(p, n << 1);
      fps f(begin(a), begin(a) + min(n << 1, N));
      p = conv(p, f);
      sh(c, n << 1);
      FOR(i, n << 1) c[i] = c[i] + c[i] - p[i];
    }
    sh(c, N);
    return c;
  }

  fps inv(cf &f) {
    int t = count_terms(f), c = T::can_ntt() ? 160 : 820;
    if (t < c) return inv_sp(f);
    return T::can_ntt() ? inv_ntt(f) : inv_mtt(f);
  }

  fps div_sp(fps f, fps g) {
    if (g[0].val() != 1) {
      T c = g[0].inv();
      for (T &x : f) x *= c;
      for (T &x : g) x *= c;
    }
    vc<pair<int, T>> a;
    int N = len(g);
    FOR(i, 1, N) if (g[i].val() != 0) a.ep(i, -g[i]);
    N = len(f);
    FOR(i, N) for (Z &&[x, y] : a) f[i] += y * f[i - x];
    return f;
  }

  fps div_ntt(cf &f, cf &g) {
    int N = len(f), M = len(g);
    if (N == 1) return {f[0] / g[0]};
    int m = 1;
    while (m + m < N) m <<= 1;
    fps a(m << 1), b(m << 1), c(g);
    sh(c, m);
    c = inv(c);
    sh(c, m << 1);
    ntt(c, 0);

    FOR(i, m) a[i] = f[i];
    FOR(i, m, N) a[i] = 0;
    ntt(a, 0);
    FOR(i, m << 1) a[i] *= c[i];
    ntt(a, 1);

    fps s(N);
    FOR(i, m) s[i] = a[i];
    FOR(i, m, m << 1) a[i] = 0;
    ntt(a, 0);

    FOR(i, min(m << 1, M)) b[i] = g[i];
    FOR(i, min(m << 1, M), m << 1) b[i] = 0;
    ntt(b, 0);
    FOR(i, m << 1) a[i] *= b[i];
    ntt(a, 1);

    FOR(i, m) a[i] = 0;
    FOR(i, m, min(m << 1, N)) a[i] -= f[i];
    ntt(a, 0);
    FOR(i, m << 1) a[i] *= c[i];
    ntt(a, 1);
    FOR(i, m, N) s[i] -= a[i];
    return s;
  }

  fps div_mtt(fps f, fps g) {
    int N = len(f);
    sh(g, N);
    g = inv(g);
    f = conv(f, g);
    sh(f, N);
    return f;
  }

  fps div_dense(cf &f, cf &g) {
    return T::can_ntt() ? div_ntt(f, g) : div_mtt(f, g);
  }

  fps div(cf &f, cf &g) {
    if (count_terms(g) < 50) return div_sp(f, g);
    return T::can_ntt() ? div_ntt(f, g) : div_mtt(f, g);
  }

  fps log_sp(cf &f) {
    int N = len(f);
    vc<pair<int, T>> a;
    FOR(i, 1, N) if (f[i].val() != 0) a.ep(i, f[i]);
    fps b(N), c(N - 1);
    FOR(i, N - 1) {
      T s = f[i + 1] * T(i + 1);
      for (Z &&[x, y] : a) {
        if (x > i) break;
        s -= y * c[i - x];
      }
      c[i] = s;
      b[i + 1] = s * inv(i + 1);
    }
    return b;
  }

  fps log_dense(cf &f) {
    assert(f[0] == T(1));
    int N = len(f);
    fps c(f);
    FOR(i, N) c[i] *= i;
    c = div_dense(c, f);
    FOR(i, N) c[i] *= inv(i);
    return c;
  }

  fps log(cf &f) {
    assert(f[0] == T(1));
    int c = count_terms(f), t = T::can_ntt() ? 200 : 1200;
    return c <= t ? log_sp(f) : log_dense(f);
  }

  fps exp_sp(cf &f) {
    int N = len(f);
    if (N == 0) return {T(1)};
    assert(f[0].val() == 0);
    vc<pair<int, T>> a;
    FOR(i, 1, N) if (f[i].val() != 0) a.ep(i - 1, f[i] * T(i));
    fps c(N);
    c[0] = 1;
    FOR(i, 1, N) {
      T s = 0;
      for (Z &&[x, y] : a) {
        if (x > i - 1) break;
        s += y * c[i - 1 - x];
      }
      c[i] = s * inv(i);
    }
    return c;
  }

  fps exp_ntt(cf &f) {
    int N = len(f);
    assert(N > 0 and f[0].val() == 0);
    vc<T> s{1, (N > 1 ? f[1] : 0)}, c{1}, a, b{1, 1};
    while (len(s) < N) {
      int m = len(s);
      fps y = s;
      sh(y, m << 1);
      ntt(y, 0);
      a = b;
      vc<T> z(m);
      FOR(i, m) z[i] = y[i] * a[i];
      ntt(z, 1);
      FOR(i, m >> 1) z[i] = 0;
      ntt(z, 0);
      FOR(i, m) z[i] *= -a[i];
      ntt(z, 1);
      c.insert(c.end(), z.begin() + m / 2, z.end());
      b = c;
      sh(b, m << 1);
      ntt(b, 0);

      vc<T> x(f.begin(), f.begin() + m);
      FOR(i, m - 1) x[i] = x[i + 1] * T(i + 1);
      x.back() = 0;
      ntt(x, 0);
      FOR(i, m) x[i] *= y[i];
      ntt(x, 1);

      FOR(i, m - 1) x[i] -= s[i + 1] * T(i + 1);

      sh(x, m << 1);
      FOR(i, m - 1) x[m + i] = x[i], x[i] = 0;
      ntt(x, 0);
      FOR(i, m << 1) x[i] *= b[i];
      ntt(x, 1);
      FOR_R(i, len(x) - 1) x[i + 1] = x[i] * inv(i + 1);
      x[0] = 0;

      FOR(i, m, min(N, m << 1)) x[i] += f[i];
      FOR(i, m) x[i] = 0;
      ntt(x, 0);
      FOR(i, m << 1) x[i] *= y[i];
      ntt(x, 1);
      s.insert(s.end(), x.begin() + m, x.end());
    }
    sh(s, N);
    return s;
  }

  fps exp_mtt(cf &e) {
    fps h(e);
    int N = len(h), n = 0, m = 1;
    assert(N > 0 and h[0] == T(0));
    while (1 << n < N) ++n;
    sh(h, 1 << n);
    Z dh = diff(h);
    fps f{1}, g{1}, p;
    FOR(n) {
      p = conv(f, g);
      sh(p, m);
      p = conv(p, g);
      sh(p, m);
      sh(g, m);
      FOR(i, m) g[i] += g[i] - p[i];

      p = {dh.begin(), dh.begin() + m - 1};
      p = conv(f, p);
      sh(p, m + m - 1);
      FOR(i, m + m - 1) p[i] = -p[i];
      FOR(i, m - 1) p[i] += T(i + 1) * f[i + 1];
      p = conv(p, g);
      sh(p, m + m - 1);
      FOR(i, m - 1) p[i] += dh[i];
      p = inte(p);
      FOR(i, m << 1) p[i] = h[i] - p[i];
      p[0] += T(1);
      f = conv(f, p);
      m <<= 1;
      sh(f, m);
    }
    sh(f, N);
    return f;
  }

  fps exp(cf &f) {
    int n = count_terms(f), t = T::can_ntt() ? 320 : 3000;
    if (n <= t) return exp_sp(f);
    return T::can_ntt() ? exp_ntt(f) : exp_mtt(f);
  }

  // O(Nlog^2N) N 为总度数
  fps conv_all(vc<fps> &f) {
    if (f.empty()) return {T(1)};
    while (1) {
      int n = len(f);
      if (n == 1) break;
      int m = (n + 1) >> 1;
      FOR(i, m) {
        if (i + i + 1 == n) f[i] = f[i << 1];
        else f[i] = conv(f[i << 1], f[i << 1 | 1]);
      }
      f.resize(m);
    }
    return f[0];
  }

  fps conv_all_1(fps f) {
    vc<fps> g;
    for (T x : f) g.ep(fps{T(1), -x});
    return conv_all(g);
  }

  fps sum_of_pow(cf &a, int N) {
    fps f = conv_all_1(a);
    sh(f, N + 1);
    f = log(f);
    FOR(i, N + 1) f[i] = -f[i] * T(i);
    f[0] = len(a);
    return f;
  }
};
#line 11 "No_1145_Sums_of_Powers.cpp"

using mint = M99;
using fps = vc<mint>;
fps_t<mint> X;
void Yorisou() {
  INT(N, K);
  VEC(mint, a, N);
  fps s = X.sum_of_pow(a, K);
  s.erase(begin(s));
  print(s);
}
constexpr int tests = 0, fl = 0, DB = 10;
#line 1 "YRS/aa/main.hpp"
int main() {
  cin.tie(0)->sync_with_stdio(0);
  int T = 1;
  if (fl) cerr.tie(0);
  if (tests and not fl) IN(T);
  for (int i = 0; i < T or fl; ++i) {
    Yorisou();
    if (fl and i % DB == 0) cerr << "Case: " << i << '\n';
  }
  return 0;
}
#line 24 "No_1145_Sums_of_Powers.cpp"
yukicoder

結果

ソースコード