#1149018 (C++23) No.129 お年玉(2)

提出ソース
結果

問題	No.129 お年玉(2)
コンテスト
ユーザー	zeta
提出日時	2026-02-22 08:40:38
言語	C++23 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=c++23 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	AC
実行時間	614 ms / 5,000 ms
コード長	35,742 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	5,316 ms
コンパイル使用メモリ	338,132 KB
実行使用メモリ	7,848 KB
最終ジャッジ日時	2026-02-22 08:40:57
合計ジャッジ時間	17,831 ms
ジャッジサーバーID （参考情報）	judge4 / judge3
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 3
other	AC * 46
権限があれば一括ダウンロードができます
ソースコード

raw source code
#line 1 "No_129_\u304a\u5e74\u7389_2.cpp"
#define YRSD
#line 2 "YRS/all.hpp"

#line 2 "YRS/aa/head.hpp"

#include <iostream>
#include <algorithm>

#include <array>
#include <bitset>
#include <map>
#include <numeric>
#include <queue>
#include <set>
#include <string>
#include <tuple>

#include <bit>
#include <chrono>
#include <functional>
#include <iomanip>
#include <utility>
#include <type_traits>
#include <cassert>
#include <cctype>
#include <cmath>
#include <cstring>
#include <ctime>
#include <limits>
#include <ranges>
#include <concepts>

#define TE template <typename T>
#define TES template <typename T, typename ...S>
#define Z auto
#define ep emplace_back
#define eb emplace
#define fi first
#define se second
#define all(x) (x).begin(), (x).end()

#define OV4(a, b, c, d, e, ...) e
#define FOR1(a) for (int _ = 0; _ < (a); ++_)
#define FOR2(i, a) for (int i = 0; i < (a); ++i)
#define FOR3(i, a, b) for (int i = (a); i < (b); ++i)
#define FOR4(i, a, b, c) for (int i = (a); i < (b); i += (c))
#define FOR(...) OV4(__VA_ARGS__, FOR4, FOR3, FOR2, FOR1)(__VA_ARGS__)
#define FOR1_R(a) for (int _ = (a) - 1; _ >= 0; --_)
#define FOR2_R(i, a) for (int i = (a) - 1; i >= 0; --i)
#define FOR3_R(i, a, b) for (int i = (b) - 1; i >= (a); --i)
#define FOR4_R(i, a, b, c) for (int i = (b) - 1; i >= (a); i -= (c))
#define FOR_R(...) OV4(__VA_ARGS__, FOR4_R, FOR3_R, FOR2_R, FOR1_R)(__VA_ARGS__)
#define FOR_subset(t, s) for (int t = (s); t > -1; t = (t == 0 ? -1 : (t - 1) & s))

#define sort ranges::sort

using namespace std;

TE using vc = vector<T>;
TE using vvc = vc<vc<T>>;
TE using T1 = tuple<T>;
TE using T2 = tuple<T, T>;
TE using T3 = tuple<T, T, T>;
TE using T4 = tuple<T, T, T, T>;
TE using max_heap = priority_queue<T>;
TE using min_heap = priority_queue<T, vc<T>, greater<T>>;
using u8 = unsigned char; using uint = unsigned int; using ll = long long;      using ull = unsigned long long;
using ld = long double;   using i128 = __int128;     using u128 = __uint128_t;  using f128 = __float128;
using u16 = uint16_t;
using PII = pair<int, int>;   using PLL = pair<ll, ll>;

#ifdef YRSD
constexpr bool dbg = 1;
#else
constexpr bool dbg = 0;
#endif
#line 2 "YRS/IO/IO.hpp"

istream &operator>>(istream &I, i128 &x) {
  static string s;
  I >> s;
  int f = s[0] == '-';
  x = 0;
  const int N = (int)s.size();
  FOR(i, f, N) x = x * 10 + s[i] - '0';
  if (f) x = -x;
  return I;
}
ostream &operator<<(ostream &O, i128 x) {
  static string s;
  s.clear();
  bool f = x < 0;
  if (f) x = -x;
  while (x) s += '0' + x % 10, x /= 10;
  if (s.empty()) s += '0';
  if (f) s += '-';
  reverse(all(s));
  return O << s;
}
istream &operator>>(istream &I, f128 &x) {
  static string s;
  I >> s, x = stold(s);
  return I;
}
ostream &operator<<(ostream &O, const f128 x) { return O << ld(x); }
template <typename... S>
istream &operator>>(istream &I, tuple<S...> &t) {
  return apply([&I](Z &...s) { ((I >> s), ...); }, t), I;
}
template <typename T, typename U>
istream &operator>>(istream &I, pair<T, U> &x) {
  return I >> x.fi >> x.se;
}
template <typename T, typename U>
ostream &operator<<(ostream &O, const pair<T, U> &x) {
  return O << x.fi << ' ' << x.se;
}
TE requires requires(T &c) { begin(c); end(c); } and 
                          (not is_same_v<decay_t<T>, string>)
istream &operator>>(istream &I, T &c) {
  for (Z &e : c) I >> e;
  return I;
}
TE requires requires(const T &c) { begin(c); end(c); } and 
  (not is_same_v<decay_t<T>, const char*>) and 
  (not is_same_v<decay_t<T>, string>) and 
  (not is_array_v<remove_reference_t<T>> or 
   not is_same_v<remove_extent_t<remove_reference_t<T>>, char>)
ostream &operator<<(ostream &O, const T &a) {
  if (a.empty()) return O;
  Z i = a.begin();
  O << *i++;
  for (; i != a.end(); ++i) O << ' ' << *i;
  return O;
}
void IN() {}
TE void IN(T &x, Z &...s) { cin >> x, IN(s...); }
void print() { cout << '\n'; }
TES void print(T &&x, S &&...y) {
  cout << x;
  if constexpr (sizeof...(S)) cout << ' ';
  print(forward<S>(y)...);
}
void put() { cout << ' '; }
TES void put(T &&x, S &&...y) {
  cout << x;
  if constexpr (sizeof...(S)) cout << ' ';
  put(forward<S>(y)...);
}

#define INT(...)  int    __VA_ARGS__; IN(__VA_ARGS__)
#define UINT(...) uint   __VA_ARGS__; IN(__VA_ARGS__)
#define LL(...)   ll     __VA_ARGS__; IN(__VA_ARGS__)
#define ULL(...)  ull    __VA_ARGS__; IN(__VA_ARGS__)
#define I128(...) i128   __VA_ARGS__; IN(__VA_ARGS__)
#define STR(...)  string __VA_ARGS__; IN(__VA_ARGS__)
#define CH(...)   char   __VA_ARGS__; IN(__VA_ARGS__)
#define REAL(...) re     __VA_ARGS__; IN(__VA_ARGS__)
#define VEC(T, a, n) vc<T> a(n); IN(a)

void YES(bool o = 1) { print(o ? "YES" : "NO"); }
void Yes(bool o = 1) { print(o ? "Yes" : "No"); }
void yes(bool o = 1) { print(o ? "yes" : "no"); }
void NO(bool o = 1) { YES(not o); }
void No(bool o = 1) { Yes(not o); }
void no(bool o = 1) { yes(not o); }
void ALICE(bool o = 1) { print(o ? "ALICE" : "BOB"); }
void Alice(bool o = 1) { print(o ? "Alice" : "Bob"); }
void alice(bool o = 1) { print(o ? "alice" : "bob"); }
void BOB(bool o = 1) { ALICE(not o); }
void Bob(bool o = 1) { Alice(not o); }
void bob(bool o = 1) { alice(not o); }
void POSSIBLE(bool o = 1) { print(o ? "POSSIBLE" : "IMPOSSIBLE"); }
void Possible(bool o = 1) { print(o ? "Possible" : "Impossible"); }
void possible(bool o = 1) { print(o ? "possible" : "impossible"); }
void IMPOSSIBLE(bool o = 1) { POSSIBLE(not o); }
void Impossible(bool o = 1) { Possible(not o); }
void impossible(bool o = 1) { possible(not o); }
void TAK(bool o = 1) { print(o ? "TAK" : "NIE"); }
void NIE(bool o = 1) { TAK(not o); }
#line 5 "YRS/all.hpp"

#if (__cplusplus >= 202002L)
#include <numbers>
constexpr ld pi = numbers::pi;
#endif
TE constexpr T inf = numeric_limits<T>::max();
template <> constexpr i128 inf<i128> = i128(inf<ll>) * 2'000'000'000'000'000'000;
template <typename T, typename U>
constexpr pair<T, U> inf<pair<T, U>> = {inf<T>, inf<U>};

TE constexpr static inline int pc(T x) { return popcount(make_unsigned_t<T>(x)); }
constexpr static inline ll len(const Z &a) { return a.size(); }

void reverse(Z &a) { reverse(all(a)); }

void unique(Z &a) {
  sort(a);
  a.erase(unique(all(a)), a.end());
}
TE vc<int> inverse(const vc<T> &a) {
  int N = len(a);
  vc<int> b(N, -1);
  FOR(i, N) if (a[i] != -1) b[a[i]] = i;
  return b;
}

Z QMAX(const Z &a) { return *max_element(all(a)); }
Z QMIN(const Z &a) { return *min_element(all(a)); }
TE Z QMAX(T l, T r) { return *max_element(l, r); }
TE Z QMIN(T l, T r) { return *min_element(l, r); }
constexpr bool chmax(Z &a, const Z &b) { return (a < b ? a = b, 1 : 0); }
constexpr bool chmin(Z &a, const Z &b) { return (a > b ? a = b, 1 : 0); }

vc<int> argsort(const Z &a) {
  vc<int> I(len(a));
  iota(all(I), 0);
  sort(I, [&](int i, int k) { return a[i] < a[k] or (a[i] == a[k] and i < k); });
  return I;
}
TE vc<T> rearrange(const vc<T> &a, const vc<int> &I) {
  int N = len(I);
  vc<T> b(N);
  FOR(i, N) b[i] = a[I[i]];
  return b;
}
template <int of = 1, typename T> 
vc<T> pre_sum(const vc<T> &a) {
  int N = len(a);
  vc<T> c(N + 1);
  FOR(i, N) c[i + 1] = c[i] + a[i];
  if (of == 0) c.erase(c.begin());
  return c;
}

TE constexpr static int topbit(T x) {
  if (x == 0) return - 1;
  if constexpr (sizeof(T) <= 4) return 31 - __builtin_clz(x);
  else return 63 - __builtin_clzll(x);
}
TE constexpr static int lowbit(T x) {
  if (x == 0) return -1;
  if constexpr (sizeof(T) <= 4) return __builtin_ctz(x);
  else return __builtin_ctzll(x);
}

TE constexpr T floor(T x, T y) { return x / y - (x % y and (x ^ y) < 0); }
TE constexpr T ceil(T x, T y) { return floor(x + y - 1, y); }
TE constexpr T bmod(T x, T y) { return x - floor(x, y) * y; }
TE constexpr pair<T, T> divmod(T x, T y) {
  T q = floor(x, y);
  return pair{q, x - q * y};
}
template <typename T = ll>
T SUM(const Z &v) {
  return accumulate(all(v), T(0));
}
int lb(const Z &a, Z x) { return lower_bound(all(a), x) - a.begin(); }
TE int lb(T l, T r, Z x) { return lower_bound(l, r, x) - l; }
int ub(const Z &a, Z x) { return upper_bound(all(a), x) - a.begin(); }
TE int ub(T l, T r, Z x) { return upper_bound(l, r, x) - l; }

template <bool ck = 1>
ll bina(Z f, ll l, ll r) {
  if constexpr (ck) assert(f(l));
  while (abs(l - r) > 1) {
    ll x = (r + l) >> 1;
    (f(x) ? l : r) = x;
  }
  return l;
}
TE T bina_real(Z f, T l, T r, int c = 100) {
  while (c--) {
    T x = (l + r) / 2;
    (f(x) ? l : r) = x;
  }
  return (l + r) / 2;
}

Z pop(Z &s) {
  if constexpr (requires { s.pop_back(); }) {
    Z x = s.back();
    return s.pop_back(), x;
  } else if constexpr (requires { s.top(); }) {
    Z x = s.top();
    return s.pop(), x;
  } else {
    Z x = s.front();
    return s.pop(), x;
  }
}
void setp(int x) { cout << fixed << setprecision(x); }

TE inline void sh(vc<T> &a, int N, T b = {}) {
  a.resize(N, b);
}
#line 1 "YRS/debug.hpp"
#ifdef YRSD
void DBG() { cerr << "]" << endl; }
TES void DBG(T &&x, S &&...y) {
  cerr << x;
  if constexpr (sizeof...(S)) cerr << ", ";
  DBG(forward<S>(y)...);
}
#define debug(...) cerr << "[" << __LINE__ << "]: [" #__VA_ARGS__ "] = [", DBG(__VA_ARGS__)
void ERR() { cerr << endl; }
TES void ERR(T &&x, S &&...y) {
  cerr << x;
  if constexpr (sizeof...(S)) cerr << ", ";
  ERR(forward<S>(y)...);
}
#define err(...) cerr << "[" << __LINE__ << "]: ", ERR(__VA_ARGS__)
#define asser assert
#else
#define debug(...) void(0721)
#define err(...)   void(0721)
#define asser(...) void(0721)
#endif
#line 4 "No_129_\u304a\u5e74\u7389_2.cpp"
// #include "YRS/IO/fast_io.hpp"
// #include "YRS/random/rng.hpp"
// #include "YRS/ds/basic/retsu.hpp"
// #include "YRS/mod/mint.hpp"
#line 2 "YRS/nt/bigint/big.hpp"

#line 2 "YRS/mod/mint.hpp"

#line 2 "YRS/mod/modint_common.hpp"

TE concept is_mint = requires(T x) {
  { T::get_mod() };
  { T::gen(0ull) } -> same_as<T>;
  x.val;
};
TE concept has_const_mod =
    requires { integral_constant<int, (int)T::get_mod()> {}; };

TE static vc<T> &invs() {
  static vc<T> a{0, 1};
  return a;
}
TE static vc<T> &fac() {
  static vc<T> a{1, 1};
  return a;
}
TE static vc<T> &ifac() {
  static vc<T> a{1, 1};
  return a;
}

TE static int Set_inv(int N) {
  static vc<T> &inv = invs<T>();
  if (len(inv) >= N) return N;
  inv.resize(N + 1);
  inv[0] = 1, inv[1] = 1;
  FOR(i, 1, N) inv[i + 1] = inv[i] * i;
  T t = pop(inv).inv();
  FOR_R(i, N) inv[i] *= t, t *= i;
  return N;
}
TE static int Set_comb(int N) {
  static vc<T> &fa = fac<T>(), &ifa = ifac<T>();
  if (len(fa) >= N) return N;
  fa.resize(N);
  ifa.resize(N);
  FOR(i, 1, N) fa[i] = fa[i - 1] * i;
  ifa[N - 1] = fa[N - 1].inv();
  FOR_R(i, N - 1) ifa[i] = ifa[i + 1] * (i + 1);
  return N;
}

template <typename mint>
mint inv(int n) {
  static const int mod = mint::get_mod();
  static vc<mint> &a = invs<mint>();
  assert(0 <= n);
  while (len(a) <= n) {
    int k = len(a);
    int q = (mod + k - 1) / k;
    int r = k * q - mod;
    a.ep(a[r] * mint(q));
  }
  return a[n];
}
template <typename mint>
mint fact(int n) {
  static const int mod = mint::get_mod();
  static vc<mint> &a = fac<mint>();
  assert(0 <= n);
  if (n >= mod) return 0;
  while (len(a) <= n) {
    int k = len(a);
    a.ep(a[k - 1] * mint(k));
  }
  return a[n];
}

template <typename mint>
mint fact_inv(int n) {
  static vc<mint> &a = ifac<mint>();
  if (n < 0) return mint(0);
  while (len(a) <= n)
    a.ep(a[len(a) - 1] * inv<mint>(len(a)));
  return a[n];
}

template <typename mint, typename... Ts>
mint fact_invs(Ts... xs) {
  return (mint(1) * ... * fact_inv<mint>(xs));
}

template <typename mint, typename X, typename... S>
mint multinomial(X&& a, S&&... b) {
  return fact<mint>(a) * fact_invs<mint>(forward<S>(b)...);
}

template <typename mint>
mint C_dense(int n, int k) {
  assert(n >= 0);
  if (k < 0 or n < k) return 0;
  static vc<vc<mint>> C;
  static int H = 0, W = 0;
  Z calc = [&](int i, int j) -> mint {
    if (i == 0) return(j == 0 ? mint(1) : mint(0));
    return C[i - 1][j] + (j ? C[i - 1][j - 1] : 0);
  };
  if (W <= k) {
    for (int i = 0; i < H; ++i) {
      C[i].resize(k + 1);
      for (int j = W; j < k + 1; ++j) {
        C[i][j] = calc(i, j);
      }
    }
    W = k + 1;
  }
  if (H <= n) {
    C.resize(n + 1);
    for (int i = H; i < n + 1; ++i) {
      C[i].resize(W);
      for (int j = 0; j < W; ++j) {
        C[i][j] = calc(i, j);
      }
    }
    H = n + 1;
  }
  return C[n][k];
}

template <typename mint>
mint C(int N, int K) {
  assert(N >= 0);
  if (K < 0 or N < K) return 0;
  return fact<mint>(N) * fact_inv<mint>(K) * fact_inv<mint>(N - K);
}

template <typename mint>
mint lucas(ll N, ll K) {
  static constexpr int P = mint::get_mod();
  if (K > N) return 0;
  if (K == 0) return 1;
  return C<mint>(N % P, K % P) * lucas<mint>(N / P, K / P);
}

template <typename mint, bool large = false, bool dense = false>
mint binom(ll n, ll k) {
  assert(n >= 0);
  if (k < 0 or n < k) return 0;
  if constexpr (dense) return C_dense<mint>(n, k);
  if constexpr (not large) return multinomial<mint>(n, k, n - k);
  k = min(k, n - k);
  mint x(1);
  FOR(i, k) x *= mint(n - i);
  return x * fact_inv<mint>(k);
}

template <typename mint, bool large = false>
mint C_inv(ll n, ll k) {
  assert(n >= 0);
  assert(0 <= k and k <= n);
  if (not large) return fact_inv<mint>(n) * fact<mint>(k) * fact<mint>(n - k);
  return mint(1) / binom<mint, 1>(n, k);
}

// [x^d](1-x)^{-n}
template <typename mint, bool large = false, bool dense = false>
mint C_negative(ll n, ll d) {
  assert(n >= 0);
  if (d < 0) return mint(0);
  if (n == 0) return (d == 0 ? mint(1) : mint(0));
  return binom<mint, large, dense>(n + d - 1, d);
}

#define CC C<mint>
#define fac fact<mint>
#define ifac fact_inv<mint>
#define set_comb Set_comb<mint>
#define set_inv Set_inv<mint>
#line 4 "YRS/mod/mint.hpp"

#define C constexpr
template <int mod>
struct mint_t {
  using mint = mint_t;
  static C uint m = mod;
  uint x;

  C uint val() const { return x; }

  C mint_t() : x(0) {}
  C mint_t(uint x) : x(x % m) {}
  C mint_t(ull x) : x(x % m) {}
  C mint_t(u128 x) : x(x % m) {}
  C mint_t(int x) : x((x %= mod) < 0 ? x + mod : x) {}
  C mint_t(ll x) : x((x %= mod) < 0 ? x + mod : x) {}
  C mint_t(i128 x) : x((x %= mod) < 0 ? x + mod : x) {}

  C mint &operator+=(mint p) {
    if ((x += p.x) >= m) x -= m;
    return *this;
  }
  C mint &operator-=(mint p) {
    if ((x += m - p.x) >= m) x -= m;
    return *this;
  }
  C mint operator+(mint p) const { return mint(*this) += p; }
  C mint operator-(mint p) const { return mint(*this) -= p; }

  C mint &operator*=(mint p) {
    x = ull(x) * p.x % m;
    return *this;
  }
  C mint operator*(mint p) const { return mint(*this) *= p; }

  C mint &operator/=(mint p) { return *this *= p.inv(); }
  C mint operator/(mint p) const { return mint(*this) /= p; }

  C mint operator-() const { return mint::gen(x ? mod - x : 0); }

  C mint inv() const {
    int a = x, b = mod, x = 1, y = 0;
    while (b > 0) {
      int t = a / b;
      swap(a -= t * b, b);
      swap(x -= t * y, y);
    }
    return mint(x);
  }

  C mint pow(ll k) const {
    if (k < 0) return inv().pow(-k);
    mint s(1), a(x);
    for (; k; k >>= 1, a *= a)
      if (k & 1) s *= a;
    return s;
  }

  C bool operator<(mint p) const { return x < p.x; }
  C bool operator==(mint p) const { return x == p.x; }
  C bool operator!=(mint p) const { return x != p.x; }

  static C mint gen(uint x) {
    mint s;
    s.x = x;
    return s;
  }

  friend istream &operator>>(istream &cin, mint &p) {
    ll t;
    cin >> t;
    p = t;
    return cin;
  }
  friend ostream &operator<<(ostream &cout, mint p) { return cout << p.x; }

  static C int get_mod() { return mod; }

  static C PII ntt_info() {
    if (mod == 167772161) return {25, 17};
    if (mod == 469762049) return {26, 30};
    if (mod == 754974721) return {24, 362};
    if (mod == 998244353) return {23, 31};
    if (mod == 120586241) return {20, 74066978};
    if (mod == 880803841) return {23, 211};
    if (mod == 943718401) return {22, 663003469};
    if (mod == 1004535809) return {21, 582313106};
    if (mod == 1012924417) return {21, 368093570};
    return {-1, -1};
  }
  
  static C bool can_ntt() { return ntt_info().fi != -1; }
};
#undef C

using M99 = mint_t<998244353>;
using M17 = mint_t<1000000007>;

#ifdef FIO
template <int mod>
void rd(mint_t<mod> &x) {
  LL(y);
  x = y;
}
template <int mod>
void wt(mint_t<mod> x) {
  wt(x.x);
}
#endif
#line 2 "YRS/po/convolution.hpp"

#line 2 "YRS/po/c/ntt.hpp"

#line 4 "YRS/po/c/ntt.hpp"

template <typename mint>
void ntt(vc<mint> &a, bool in) {
  assert(mint::can_ntt());
  const int p = mint::ntt_info().fi;
  const uint m = mint::get_mod();
  static array<mint, 30> r, ir, ra, ira, rat, irat;
  assert(p != -1 and len(a) <= (1 << max(0, p)));
  static bool ok = 0;
  if (not ok) {
    ok = 1;
    r[p] = mint::ntt_info().se;
    ir[p] = mint(1) / r[p];
    FOR_R(i, p) {
      r[i] = r[i + 1] * r[i + 1];
      ir[i] = ir[i + 1] * ir[i + 1];
    }
    mint s = 1, in = 1;
    FOR(i, p - 1) {
      ra[i] = r[i + 2] * s;
      ira[i] = ir[i + 2] * in;
      s *= ir[i + 2];
      in *= r[i + 2];
    }
    s = 1, in = 1;
    FOR(i, p - 2) {
      rat[i] = r[i + 3] * s;
      irat[i] = ir[i + 3] * in;
      s *= ir[i + 3];
      in *= r[i + 3];
    }
  }

  int N = len(a), n = topbit(N);
  if (not in) {
    int sz = 0;
    while (sz < n) {
      if (n - sz == 1) {
        int p = 1 << (n - sz - 1);
        mint c = 1;
        FOR(s, 1 << sz) {
          int of = s << (n - sz);
          FOR(i, p) {
            mint l = a[i + of], r = a[i + of + p] * c;
            a[i + of] = l + r, a[i + of + p] = l - r;
          }
          c *= ra[topbit(~s & -~s)];
        }
        ++sz;
      } else {
        int p = 1 << (n - sz - 2);
        mint c = 1, in = r[2];
        FOR(s, 1 << sz) {
          mint r2 = c * c, r3 = r2 * c;
          int of = s << (n - sz);
          FOR(i, p) {
            const ull mm = ull(m) * m;
            ull a0 = a[i + of].val(), a1 = ull(a[i + of + p].val()) * c.val();
            ull aa = ull(a[i + of + 2 * p].val()) * r2.val();
            ull bb = ull(a[i + of + 3 * p].val()) * r3.val();
            ull t = (a1 + mm - bb) % m * in.val();
            ull na = mm - aa;
            a[i + of] = a0 + a1 + aa + bb;
            a[i + of + p] = a0 + aa + mm * 2 - a1 - bb;
            a[i + of + 2 * p] = a0 + na + t;
            a[i + of + 3 * p] = a0 + na + mm - t;
          }
          c *= rat[topbit(~s & -~s)];
        }
        sz += 2;
      }
    }
  } else {
    mint c = mint(1) / mint(N);
    FOR(i, N) a[i] *= c;
    int sz = n;
    while (sz) {
      if (sz == 1) {
        int p = 1 << (n - sz);
        mint c = 1;
        FOR(s, 1 << (sz - 1)) {
          int of = s << (n - sz + 1);
          FOR(i, p) {
            ull l = a[i + of].val(), r = a[i + of + p].val();
            a[i + of] = l + r;
            a[i + of + p] = (m + l - r) * c.val();
          }
          c *= ira[topbit(~s & -~s)];
        }
        --sz;
      } else {
        int p = 1 << (n - sz);
        mint c = 1, in = ir[2];
        FOR(s, 1 << (sz - 2)) {
          mint r2 = c * c, r3 = r2 * c;
          int of = s << (n - sz + 2);
          FOR(i, p) {
            ull a0 = a[i + of].val(), a1 = a[i + of + p].val();
            ull aa = a[i + of + 2 * p].val();
            ull bb = a[i + of + 3 * p].val();
            ull x = (m + aa - bb) * in.val() % m;
            a[i + of] = a0 + a1 + aa + bb;
            a[i + of + p] = (a0 + m - a1 + x) * c.val();
            a[i + of + 2 * p] = (a0 + a1 + 2 * m - aa - bb) * r2.val();
            a[i + of + 3 * p] = (a0 + 2 * m - a1 - x) * r3.val();
          }
          c *= irat[topbit(~s & -~s)];
        }
        sz -= 2;
      }
    }
  }
}
#line 2 "YRS/mod/crt3.hpp"

constexpr uint pw_c(ull a, ull b, uint mod) {
  a %= mod;
  ull res = 1;
  FOR(32) {
    if (b & 1) res = res * a % mod;
    a = a * a % mod, b >>= 1;
  }
  return res;
}

template <typename T, uint p0, uint p1>
T crt(ull a0, ull a1) {
  static_assert(p0 < p1);
  static constexpr ull x0_1 = pw_c(p0, p1 - 2, p1);
  ull c = (a1 - a0 + p1) * x0_1 % p1;
  return a0 + c * p0;
}

template <typename T, uint p0, uint p1, uint p2>
T crt(ull a0, ull a1, ull a2) {
  static_assert(p0 < p1 and p1 < p2);
  static constexpr ull x1 = pw_c(p0, p1 - 2, p1);
  static constexpr ull x2 = pw_c(ull(p0) * p1 % p2, p2 - 2, p2);
  static constexpr ull p01 = ull(p0) * p1;
  ull c = (a1 - a0 + p1) * x1 % p1;
  ull ans_1 = a0 + c * p0;
  c = (a2 - ans_1 % p2 + p2) * x2 % p2;
  return T(ans_1) + T(c) * T(p01);
}

template <typename T, uint p0, uint p1, uint p2, uint p3>
T crt(ull a0, ull a1, ull a2, ull a3) {
  static_assert(p0 < p1 and p1 < p2 and p2 < p3);
  static constexpr ull x1 = pw_c(p0, p1 - 2, p1);
  static constexpr ull x2 = pw_c(ull(p0) * p1 % p2, p2 - 2, p2);
  static constexpr ull x3 = pw_c(ull(p0) * p1 % p3 * p2 % p3, p3 - 2, p3);
  static constexpr ull p01 = ull(p0) * p1;
  ull c = (a1 - a0 + p1) * x1 % p1;
  ull ans_1 = a0 + c * p0;
  c = (a2 - ans_1 % p2 + p2) * x2 % p2;
  u128 ans_2 = ans_1 + c * u128(p01);
  c = (a3 - ans_2 % p3 + p3) * x3 % p3;
  return T(ans_2) + T(c) * T(p01) * T(p2);
}

template <typename T, uint p0, uint p1, uint p2, uint p3, uint p4>
T crt(ull a0, ull a1, ull a2, ull a3, ull a4) {
  static_assert(p0 < p1 and p1 < p2 and p2 < p3 and p3 < p4);
  static constexpr ull x1 = pw_c(p0, p1 - 2, p1);
  static constexpr ull x2 = pw_c(ull(p0) * p1 % p2, p2 - 2, p2);
  static constexpr ull x3 = pw_c(ull(p0) * p1 % p3 * p2 % p3, p3 - 2, p3);
  static constexpr ull x4 = pw_c(ull(p0) * p1 % p4 * p2 % p4 * p3 % p4, p4 - 2, p4);
  static constexpr ull p01 = ull(p0) * p1;
  static constexpr ull p23 = ull(p2) * p3;
  ull c = (a1 - a0 + p1) * x1 % p1;
  ull ans_1 = a0 + c * p0;
  c = (a2 - ans_1 % p2 + p2) * x2 % p2;
  u128 ans_2 = ans_1 + c * u128(p01);
  c = ull(a3 - ans_2 % p3 + p3) * x3 % p3;
  u128 ans_3 = ans_2 + u128(c * p2) * p01;
  c = ull(a4 - ans_3 % p4 + p4) * x4 % p4;
  return T(ans_3) + T(c) * T(p01) * T(p23);
}
#line 5 "YRS/po/convolution.hpp"

TE vc<T> conv_naive(const vc<T> &a, const vc<T> &b) {
  int N = len(a), M = len(b), sz = N + M - 1;
  if (not N or not M) return {};
  if (N > M) return conv_naive(b, a);
  vc<T> c(sz);
  FOR(i, N) FOR(k, M) c[i + k] += a[i] * b[k];
  return c;
}

TE vc<T> conv_ntt(vc<T> a, vc<T> b) {
  assert(T::can_ntt());
  if (a.empty() or b.empty()) return {};
  int N = len(a), M = len(b), sz = 1;
  while (sz < N + M - 1) sz <<= 1;
  sh(a, sz), sh(b, sz);
  bool ok = a == b;
  ntt(a, 0);
  if (ok) b = a;
  else ntt(b, 0);
  FOR(i, sz) a[i] *= b[i];
  ntt(a, 1);
  sh(a, N + M - 1);
  return a;
}

TE vc<T> conv_mtt(const vc<T> &a, const vc<T> &b) {
  int N = len(a), M = len(b);
  if (not N or not M) return {};
  static constexpr int p0 = 167772161;
  static constexpr int p1 = 469762049;
  static constexpr int p2 = 754974721;
  using M0 = mint_t<p0>;
  using M1 = mint_t<p1>;
  using M2 = mint_t<p2>;
  vc<M0> a0(N), b0(M);
  vc<M1> a1(N), b1(M);
  vc<M2> a2(N), b2(M);
  FOR(i, N) a0[i] = a[i].val(), a1[i] = a[i].val(), a2[i] = a[i].val();
  FOR(i, M) b0[i] = b[i].val(), b1[i] = b[i].val(), b2[i] = b[i].val();
  vc<M0> c0 = conv_ntt<M0>(a0, b0);
  vc<M1> c1 = conv_ntt<M1>(a1, b1);
  vc<M2> c2 = conv_ntt<M2>(a2, b2);
  vc<T> c(len(c0));
  FOR(i, N + M - 1) c[i] = crt<T, p0, p1, p2>(c0[i].val(), c1[i].val(), c2[i].val());
  return c;
}

TE vc<T> convolution(const vc<T> &a, const vc<T> &b) {
  int N = len(a), M = len(b);
  if (not N or not M) return {};
  if (min(N, M) <= 30) return conv_naive(a, b);
  if (T::can_ntt()) return conv_ntt(a, b);
  return conv_mtt(a, b);
}
#line 5 "YRS/nt/bigint/big.hpp"

// https://www.luogu.com.cn/problem/P2152 高精度gcd

struct bigint {
  static constexpr int TEN[]
        {1,      10,      100,      1000,      10000,
         100000, 1000000, 10000000, 100000000, 1000000000};
  static constexpr int LOG = 9, mod = TEN[LOG];
  using T = bigint;

  int op;
  vc<int> a;

  bigint() : op(0), a() {}
  bigint(int op, const vc<int> &a) : op(op), a(a) {}
  bigint(ll x) {
    if (x == 0) { op = 0; return; }
    op = 1;
    if (x < 0) op = -1, x = -x;
    while (x) a.ep(x % mod), x /= mod;
  }
  bigint(string s) {
    if (s[0] == '0') { op = 0; return; }
    op = 1;
    if (s[0] == '-') op = -1, s.erase(s.begin());
    reverse(s);
    int N = len(s), M = ceil(N, LOG);
    a.assign(M, 0);
    FOR(i, N) a[i / LOG] += TEN[i % LOG] * (s[i] - '0');
  }

  bool operator<(const T &p) const {
    if (op != p.op) return op < p.op;
    if (op == 0) return 0;
    if (op == 1) return less(a, p.a);
    else return less(p.a, a);
  }
  bool operator>(const T &p) const { return p < *this; }
  bool operator<=(const T &p) const { return not(*this > p); }
  bool operator>=(const T &p) const { return not(*this < p); }
  bool operator==(const T &p) const { return op == p.op and a == p.a; }
  bool operator!=(const T &p) const { return op != p.op or a != p.a; }

  T operator-() const {
    T p = *this;
    return p.op = -op, p;
  }
  T operator+() const { return *this; }

  T &operator+=(const T &p) {
    if (op == 0) return *this = p;
    if (p.op == 0) return *this;
    if (op == p.op) return a = add(a, p.a), *this;
    if (less(a, p.a)) return a = sub(p.a, a), op = -op, *this;
    a = sub(a, p.a);
    if (is_zero(a)) op = 0;
    return *this;
  }
  T &operator-=(const T &p) {
    if (p.op == 0) return *this;
    if (op == 0) return *this = -p;
    if (op != p.op) return a = add(a, p.a), *this;
    if (less(a, p.a)) return a = sub(p.a, a), op = -op, *this;
    a = sub(a, p.a);
    if (is_zero(a)) op = 0;
    return *this;
  }
  T &operator*=(const T &p) {
    op *= p.op;
    if (not op) a.clear();
    else a = mul(a, p.a);
    return *this;
  }
  T &operator/=(const T &p) { return *this = divmod(p).fi; }
  T &operator%=(const T &p) { return *this = divmod(p).se; }

  T operator+(const T &p) const { return T(*this) += p; }
  T operator-(const T &p) const { return T(*this) -= p; }
  T operator*(const T &p) const { return T(*this) *= p; }
  T operator/(const T &p) const { return T(*this) /= p; }
  T operator%(const T &p) const { return T(*this) %= p; }

  pair<T, T> divmod(const T &p) const {
    assert(p.op != 0);
    if (op == 0) return {T(), T()};
    Z res = divmod_newton(a, p.a);
    int op1 = op * p.op, op2 = op;
    if (is_zero(res.fi)) op1 = 0;
    if (is_zero(res.se)) op2 = 0;
    return {{op1, res.fi}, {op2, res.se}};
  }

  T pow(ll k) const { return pow(*this, k); }

  static T pow(T a, ll k) {
    if (k == 0) return 1;
    T ls = pow(a, k >> 1), res = ls * ls;
    return (k & 1) ? res * a : res;
  }

  string to_string() const {
    if (not op) return "0";
    string s = to_string(a);
    if (op == -1) s += '-';
    return reverse(s), s;
  }

  string to_binary_string() const {
    assert(op != -1);
    vc<uint> A(all(a));
    string s;
    while (1) {
      while (not A.empty() and A.back() == uint(0)) pop(A);
      if (A.empty()) break;
      ull r = 0;
      int N = len(A);
      FOR_R(i, N) {
        r = r * mod + A[i];
        A[i] = r >> 32;
        r &= uint(-1);
      }
      FOR(i, 32) s += '0' + (r >> i & 1);
    }
    while (not s.empty() and s.back() == '0') pop(s);
    if (s.empty()) s += '0';
    return reverse(s), s;
  }

  ll to_ll() const {
    if (op == 0) return 0;
    ll x = to_ll(a);
    return op == -1 ? -x : x;
  }
  i128 to_i128() const {
    if (op == 0) return 0;
    i128 x = to_i128(a);
    return op == -1 ? -x : x;
  }

  friend ostream &operator<<(ostream &cout, const T &b) {
    return cout << b.to_string();
  }
  friend istream &operator>>(istream &cin, T &b) {
    static string s;
    return cin >> s, b = s, cin;
  }

  bool is_zero() const { return op == 0; }
  bool is_one() const { return op == 1 and len(a) == 1 and a[0] == 1; }
  bool is_odd() const { return op != 0 and (a[0] & 1); }
  bool is_even() const { return not is_odd(); }

  T div2() const {
    T r = *this;
    int N = len(a);
    for (int i = N; i--; r.a[i] >>= 1)
      if ((r.a[i] & 1) and i) r.a[i - 1] += mod;
    sh(r.a);
    if (r.a.empty()) r.op = 0;
    return r;
  }

  T gcd(const T &x) const {
    T a = this->abs(), b = x.abs();
    if (a < b) swap(a, b);
    if (b.is_zero()) return a;
    int t = 0;
    while (a.is_even() and b.is_even()) a = a.div2(), b = b.div2(), ++t;
    while (b > 0) {
      if (a.is_even()) a = a.div2();
      else if (b.is_even()) b = b.div2();
      else a -= b;
      if (a < b) swap(a, b);
    }
    while (t--) a += a;
    return a;
  }

  T lcm(const T &x) const { return *this / gcd(x) * x; }

  T abs() const {
    if (op == 0) return 0;
    if (op < 0) return -(*this);
    return *this;
  }

 private:
  using vec = vc<int>;
  using PVV = pair<vec, vec>;

  static vc<int> mul(const vec &a, const vec &b) {
    int N = len(a), M = len(b);
    if (not N or not M) return {};
    if (min(N, M) <= 500) {
      vec c(N + M - 1);
      u128 x = 0;
      FOR(k, N + M - 1) {
        int s = max(0, k + 1 - M), t = min(k, N - 1);
        FOR(i, s, t + 1) x += ull(a[i]) * b[k - i];
        c[k] = x % mod;
        x /= mod;
      }
      while (x > 0) c.ep(x % mod), x /= mod;
      return c;
    }
    static constexpr int p0 = 167772161,
                         p1 = 469762049,
                         p2 = 754974721;
    vc<mint_t<p0>> a0(all(a)), b0(all(b));
    vc<mint_t<p1>> a1(all(a)), b1(all(b));
    vc<mint_t<p2>> a2(all(a)), b2(all(b));
    Z c0 = conv_ntt(a0, b0);
    Z c1 = conv_ntt(a1, b1);
    Z c2 = conv_ntt(a2, b2);
    vec c(len(c0));
    u128 x = 0;
    FOR(i, N + M - 1) {
      x += crt<u128, p0, p1, p2>(c0[i].val(), c1[i].val(), c2[i].val());
      c[i] = x % mod, x = x / mod;
    }
    while (x) c.ep(x % mod), x /= mod;
    return c;
  }

  static bool is_zero(const vec &a) { return a.empty(); }
  static bool is_one(const vec &a) { return len(a) == 1 and a[0] == 1; }
  static bool eq(const vec &a, const vec &b) { return a == b; }
  static bool less(const vec &a, const vec &b) {
    if (len(a) != len(b)) return len(a) < len(b);
    int N = len(a);
    FOR_R(i, N) if (a[i] != b[i]) return a[i] < b[i];
    return 0;
  }
  static bool greater(const vec &a, const vec &b) { return less(b, a); }
  static bool less_eq(const vec &a, const vec &b) { return not greater(a, b); }
  static bool greater_eq(const vec &a, const vec &b) { return not less(a, b); }

  static void sh(vec &a) {
    while (not a.empty() and a.back() == 0) pop(a);
  }
  static vec to_vec(ll x) {
    vec s;
    while (x) s.ep(x % mod), x /= mod;
    return s;
  }
  static ll to_ll(const vec &a) {
    ll s = 0;
    int N = len(a);
    FOR_R(i, N) s = s * mod + a[i];
    return s;
  }
  static i128 to_i128(const vec &a) {
    i128 s = 0;
    int N = len(a);
    FOR_R(i, N) s = s * mod + a[i];
    return s;
  }
  static string to_string(const vec &a) {
    string s;
    for (int x : a) FOR(LOG) s += '0' + x % 10, x /= 10;
    while (s.back() == '0') pop(s);
    return s;
  }

  static vec add(const vec &a, const vec &b) {
    vc<int> c(all(a));
    c.resize(max(len(a), len(b)) + 1);
    int N = len(b);
    FOR(i, N) c[i] += b[i];
    N = len(c) - 1;
    FOR(i, N) if (c[i] >= mod) c[i] -= mod, ++c[i + 1];
    return sh(c), c;
  }
  static vec sub(const vec &a, const vec &b) {
    vc<int> c(all(a));
    int N = len(b);
    FOR(i, N) c[i] -= b[i];
    N = len(a) - 1;
    FOR(i, N) if (c[i] < 0) c[i] += mod, --c[i + 1];
    return sh(c), c;
  }

  // 0 <= A < 1e18, 1 <= B > 1e9
  static PVV divmod_ll_int(const vec &a, const vec &b) {
    assert(0 <= len(a) and len(a) <= 2);
    assert(len(b) == 1);
    ll x = to_ll(a);
    int y = b[0];
    return {to_vec(x / y), to_vec(x % y)};
  }
  // 0 <= A < 1e18, 1 <= B < 1e18
  static PVV divmod_ll_ll(const vec &a, const vec &b) {
    assert(0 <= len(a) and len(a) <= 2);
    assert(1 <= len(b) and len(b) <= 2);
    ll x = to_ll(a), y = to_ll(b);
    return {to_vec(x / y), to_vec(x % y)};
  }
  // 1 <= B < 1e9
  static PVV divmod_1e9(const vec &a, const vec &b) {
    assert(len(b) == 1);
    if (len(a) <= 2) return divmod_ll_int(a, b);
    int N = len(a);
    vc<int> s(N);
    ll d = 0;
    int bb = b[0];
    FOR_R(i, N) {
      d = d * mod + a[i];
      assert(d <= 1ll * mod * bb);
      int q = d / bb, r = d % bb;
      s[i] = q, d = r;
    }
    return sh(s), pair{s, d ? vc<int>{int(d)} : vc<int>{}};
  }
  // 0 <= A, 1 <= B
  static PVV divmod_naive(const vec &a, const vec &b) {
    assert(not is_zero(b));
    if (len(b) == 1) return divmod_1e9(a, b);
    if (max(len(a), len(b)) <= 2) return divmod_ll_ll(a, b);
    if (less(a, b)) return {{}, a};
    // B >= 1e9, A >= B
    int norm = mod / (b.back() + 1);
    vec x = mul(a, {norm}), y = mul(b, {norm});
    int yb = y.back();
    vec s(len(x) - len(y) + 1);
    vec r(x.end() - len(y), x.end());
    int N = len(s);
    FOR_R(i, N) {
      if (len(r) < len(y));
      else if (len(r) == len(y)) {
        if (less_eq(y, r)) s[i] = 1, r = sub(r, y);
      } else {
        assert(len(y) + 1 == len(r));
        ll rb = 1ll * r.back() * mod + r.end()[-2];
        int q = rb / yb;
        vec yq = mul(y, {q});
        while (less(r, yq)) --q, yq = sub(yq, y);
        r = sub(r, yq);
        while (less_eq(y, r)) ++q, r = sub(r, y);
        s[i] = q;
      }
      if (i) r.insert(r.begin(), x[i - 1]);
    }
    sh(s), sh(r);
    Z [ss, rr] = divmod_1e9(r, {norm});
    assert(is_zero(rr));
    return {s, ss};
  };

  // 1 / a を 絶対誤差 B^{-deg} で求める
  static vec keis_inv(const vec &a, int deg) {
    assert(not a.empty() and mod / 2 <= a.back() and a.back() < mod);
    int k = deg, N = len(a);
    while (k > 64) k = (k + 1) >> 1;
    vec b(N + k + 1);
    b.back() = 1;
    b = divmod_naive(b, a).fi;
    while (k < deg) {
      vec s = mul(b, b);
      s.insert(s.begin(), 0);
      int d = min(N, k * 2 + 1);
      vec t{a.end() - d, a.end()}, v = mul(s, t);
      v.erase(v.begin(), v.begin() + d);
      vec w(k + 1), ww = add(b, b);
      copy(all(ww), back_inserter(w));
      b = sub(w, v);
      b.erase(b.begin());
      k <<= 1;
    }
    return b.erase(b.begin(), b.begin() + k - deg), b;
  }

  static PVV divmod_newton(const vec &a, const vec &b) {
    assert(not is_zero(b));
    if (len(b) <= 64) return divmod_naive(a, b);
    if (len(a) - len(b) <= 64) return divmod_naive(a, b);
    int norm = mod / (b.back() + 1);
    vec x = mul(a, {norm}), y = mul(b, {norm});
    int N = len(x), M = len(y);
    int deg = N - M + 2;
    vec z = keis_inv(y, deg), q = mul(x, z);
    q.erase(q.begin(), q.begin() + M + deg);
    vec yq = mul(y, q);
    while (less(x, yq)) q = sub(q, {1}), yq = sub(yq, y);
    vec r = sub(x, yq);
    while (less_eq(y, r)) q = add(q, {1}), r = sub(r, y);
    sh(q), sh(r);
    Z [qq, rr] = divmod_1e9(r, {norm});
    assert(is_zero(rr));
    return {q, qq};
  }
};

bigint abs(const bigint &x) { return x.abs(); }
bigint gcd(const bigint &a, const bigint &b) { return a.gcd(b); }
bigint lcm(const bigint &a, const bigint &b) { return a.lcm(b); }
#line 9 "No_129_\u304a\u5e74\u7389_2.cpp"

#define tests 0
#define fl 0
#define DB 10
using mint = mint_t<1'000'000'000>;
using T = bigint;
T A(int a) {
  T s = 1;
  FOR(i, 2, a + 1) s *= i;
  return s;
}
void Yorisou() {
  LL(a, b);
  a = a / 1000 % b;
  T res = A(b) / A(a) / A(b - a);
  mint s;
  for (char x : res.to_binary_string()) {
    if (x == '0') s = s * 2;
    else s = s * 2 + 1;
  }
  print(s);
}
#line 1 "YRS/aa/main.hpp"
int main() {
  cin.tie(nullptr)->sync_with_stdio(0);
  int T = 1;
  if (fl) cerr.tie(0);
  if (tests and not fl) IN(T);
  for (int i = 0; i < T or fl; ++i) {
    Yorisou();
    if (fl and i % DB == 0) cerr << "Case: " << i << '\n';
  }
  return 0;
}
#line 32 "No_129_\u304a\u5e74\u7389_2.cpp"
yukicoder

結果

ソースコード