#975201 (C++23) No.2747 Permutation Adjacent Sum

提出ソース
結果

問題	No.2747 Permutation Adjacent Sum
ユーザー	rogi52
提出日時	2024-04-20 12:47:08
言語	C++23 (gcc 13.3.0 + boost 1.87.0)
結果	AC
実行時間	120 ms / 3,000 ms
コード長	23,579 bytes
コンパイル時間	5,167 ms
コンパイル使用メモリ	306,832 KB
実行使用メモリ	30,828 KB
最終ジャッジ日時	2024-10-12 08:10:30
合計ジャッジ時間	9,469 ms
ジャッジサーバーID （参考情報）	judge1 / judge4
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 2
other	AC * 40
権限があれば一括ダウンロードができます
ソースコード

raw source code
#line 2 "cp-library/src/cp-template.hpp"
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
using ld = long double;
using uint = unsigned int;
using ull  = unsigned long long;
using i32 = int;
using u32 = unsigned int;
using i64 = long long;
using u64 = unsigned long long;
using i128 = __int128_t;
template < class T > bool chmin(T& a, T b) { if(a > b) { a = b; return true; } return false; }
template < class T > bool chmax(T& a, T b) { if(a < b) { a = b; return true; } return false; }
template < class T, class U > T ceil (T x, U y) { return (x > 0 ? (x + y - 1) / y :           x / y); }
template < class T, class U > T floor(T x, U y) { return (x > 0 ?           x / y : (x - y + 1) / y); }
int popcnt(i32 x) { return __builtin_popcount(x); }
int popcnt(u32 x) { return __builtin_popcount(x); }
int popcnt(i64 x) { return __builtin_popcountll(x); }
int popcnt(u64 x) { return __builtin_popcountll(x); }

#line 2 "cp-library/src/utility/rep_itr.hpp"
template < class T > struct itr_rep {
    T i, d;
    constexpr itr_rep(const T i) noexcept : i(i), d(1) {}
    constexpr itr_rep(const T i, const T d) noexcept : i(i), d(d) {}
    void operator++() noexcept { i += d; }
    constexpr int operator*() const noexcept { return i; }
    constexpr bool operator!=(const itr_rep x) const noexcept { return d > 0 ? i < x.i : i > x.i; }
};

template < class T > struct rep {
    const itr_rep< T > s, t;
    constexpr rep(const T t) noexcept : s(0), t(t) {}
    constexpr rep(const T s, const T t) noexcept : s(s), t(t) {}
    constexpr rep(const T s, const T t, const T d) noexcept : s(s, d), t(t, d) {}
    constexpr auto begin() const noexcept { return s; }
    constexpr auto end  () const noexcept { return t; }
};

template < class T > struct revrep {
    const itr_rep < T > s, t;
    constexpr revrep(const T t) noexcept : s(t - 1, -1), t(-1, -1) {}
    constexpr revrep(const T s, const T t) noexcept : s(t - 1, -1), t(s - 1, -1) {}
    constexpr revrep(const T s, const T t, const T d) noexcept : s(t - 1, -d), t(s - 1, -d) {}
    constexpr auto begin() const noexcept { return s; }
    constexpr auto end  () const noexcept { return t; }
};
#line 3 "cp-library/src/utility/io.hpp"

/* 128bit integer */
istream& operator>>(istream& is, i128& x) {
    std::string s; is >> s;
    int pm = (s[0] == '-');
    x = 0;
    for(int i : rep(pm, int(s.size()))) x = x * 10 + (s[i] - '0');
    if(pm) x *= -1;
    return is;
}
ostream& operator<<(ostream& os, const i128& x) {
    if(x == 0) return os << '0';
    i128 y = x;
    if(y < 0) { os << '-'; y *= -1; }
    std::vector<int> ny;
    while(y > 0) { ny.push_back(y % 10); y /= 10; }
    for(int i : revrep(ny.size())) os << ny[i];
    return os;
}

template < class S, class T > istream& operator>>(istream& is,       std::pair< S, T >& x) { is >> x.first        >> x.second; return is; }
template < class S, class T > ostream& operator<<(ostream& os, const std::pair< S, T >& x) { os << x.first << " " << x.second; return os; }

namespace scanner {
    struct sca {
        template < class T > operator T() {
            T s; std::cin >> s; return s;
        }
    };
    struct vec {
        int n;
        vec(int n) : n(n) {}
        template < class T > operator std::vector< T >() {
            std::vector< T > v(n);
            for(T& x : v) std::cin >> x;
            return v;
        }
    };
    struct mat {
        int h, w;
        mat(int h, int w) : h(h), w(w) {}
        template < class T > operator std::vector< std::vector< T > >() {
            std::vector m(h, std::vector< T >(w));
            for(std::vector< T >& v : m) for(T& x : v) std::cin >> x;
            return m;
        }
    };
    struct speedup {
        speedup() {
            std::cin.tie(0);
            std::ios::sync_with_stdio(0);
        }
    } speedup_instance;
}
scanner::sca in() { return scanner::sca(); }
scanner::vec in(int n) { return scanner::vec(n); }
scanner::mat in(int h, int w) { return scanner::mat(h, w); }

namespace printer {
    void precision(int d) { std::cout << std::fixed << std::setprecision(d); }
    void flush() { std::cout.flush(); }
}

template < class T >
ostream& operator<<(ostream& os, const std::vector< T > a) {
    int n = a.size();
    for(int i : rep(n)) { os << a[i]; if(i != n - 1) os << ' '; }
    return os;
}

int print() { std::cout << '\n'; return 0; }
template < class head, class... tail > int print(head&& h, tail&&... t) {
    std::cout << h; if(sizeof...(tail)) std::cout << ' ';
    return print(std::forward<tail>(t)...);
}
template < class T > int print_n(const std::vector< T > a) {
    int n = a.size();
    for(int i : rep(n)) std::cout << a[i] << "\n";
    return 0;
}


#line 2 "cp-library/src/utility/key_val.hpp"

template < class K, class V >
struct key_val {
    K key; V val;
    key_val() {}
    key_val(K key, V val) : key(key), val(val) {}
    template < std::size_t Index >
    std::tuple_element_t< Index, key_val >& get() {
        if constexpr (Index == 0) return key;
        if constexpr (Index == 1) return val;
    }
};

namespace std {

template < class K, class V > struct tuple_size < key_val< K, V > > : integral_constant< size_t, 2 > {};
template < class K, class V > struct tuple_element < 0, key_val< K, V > > { using type = K; };
template < class K, class V > struct tuple_element < 1, key_val< K, V > > { using type = V; };

}
#line 2 "cp-library/src/utility/vec_op.hpp"
template < class T > key_val< int, T > max_of(const vector< T >& a) {
    int i = std::max_element(a.begin(), a.end()) - a.begin();
    return {i, a[i]};
}
template < class T > key_val< int, T > min_of(const vector< T >& a) {
    int i = std::min_element(a.begin(), a.end()) - a.begin();
    return {i, a[i]};
}
template < class S, class T > S sum_of(const vector< T >& a) {
    S sum = 0;
    for(const T x : a) sum += x;
    return sum;
}
template < class S, class T > vector< S > freq_of(const vector< T >& a, T L, T R) {
    vector< S > res(R - L, S(0));
    for(const T x : a) res[x - L] += 1;
    return res;
}
template < class S, class T > struct prefix_sum {
    vector< S > s;
    prefix_sum(const vector< T >& a) : s(a) {
        s.insert(s.begin(), S(0));
        for(int i : rep(a.size())) s[i + 1] += s[i];
    }
    // [L, R)
    S sum(int L, int R) { return s[R] - s[L]; }
};
#line 3 "cp-library/src/utility/heap.hpp"

template < class T > using heap_min = std::priority_queue< T, std::vector< T >, std::greater< T > >;
template < class T > using heap_max = std::priority_queue< T, std::vector< T >, std::less< T > >;

#line 27 "cp-library/src/cp-template.hpp"

#line 1 "cp-library/src/algorithm/bin_search.hpp"
template < class T, class F >
T bin_search(T ok, T ng, F f) {
    while(abs(ng - ok) > 1) {
        T mid = (ok + ng) / 2;
        (f(mid) ? ok : ng) = mid;
    }
    return ok;
}

template < class T, class F >
T bin_search_real(T ok, T ng, F f, int step = 80) {
    while(step--) {
        T mid = (ok + ng) / 2;
        (f(mid) ? ok : ng) = mid;
    }
    return ok;
}
#line 2 "cp-library/src/algorithm/argsort.hpp"

template < class T > std::vector< int > argsort(const std::vector< T > &a) {
    std::vector< int > ids((int)a.size());
    std::iota(ids.begin(), ids.end(), 0);
    std::sort(ids.begin(), ids.end(), [&](int i, int j) {
        return a[i] < a[j] || (a[i] == a[j] && i < j);
    });
    return ids;
}
#line 1 "macro.hpp"
namespace macro {

using size_type = int;
template < class container > void  sort(container& a) { std::sort(std:: begin(a), std:: end(a)); }
template < class container > void rsort(container& a) { std::sort(std::rbegin(a), std::rend(a)); }
template < class container > void reverse(container& a) { std::reverse(std::begin(a), std::end(a)); }
template < class container > void unique(container& a) {
    std::sort(std::begin(a), std::end(a));
    a.erase(std::unique(std::begin(a), std::end(a)), std::end(a));
}
template < class container > container  sorted(const container& a) { container b = a;  sort(b); return std::move(b); }
template < class container > container rsorted(const container& a) { container b = a; rsort(b); return std::move(b); }
template < class container, class compare > void sort(container& a, const compare& cmp) { std::sort(std::begin(a), std::end(a), cmp); }
template < class container, class compare > container sorted(const container& a, const compare& cmp) { container b = a; sort(b, cmp); return std::move(b); }
template < class container, class value > size_type lower_bound(const container& a, const value& x) { return std::lower_bound(std::begin(a), std::end(a), x) - std::begin(a); }
template < class container, class value > size_type upper_bound(const container& a, const value& x) { return std::upper_bound(std::begin(a), std::end(a), x) - std::begin(a); }

const std::vector<std::pair<size_type, size_type>> dir4 = { {+1,  0}, {-1,  0}, { 0, +1}, { 0, -1} };
const std::vector<std::pair<size_type, size_type>> dir8 = { {-1, -1}, {-1,  0}, {-1, +1}, { 0, -1}, { 0, +1}, {+1, -1}, {+1,  0}, {+1, +1} };

#ifdef _DEBUG
#define debug(x) std::cout << "[" << __LINE__ << "] " << #x << ": " << x << std::endl
#else
#define debug(x)
#endif

template < class container > void concat(container& a, const container& b) {
    a.insert(std::end(a), std::begin(b), std::end(b));
}
std::vector<size_type> iota(const size_type n) {
    std::vector<size_type> I(n);
    std::iota(std::begin(I), std::end(I), 0);
    return I;
}
template < class container > std::vector<size_type> sort_idx(const container& a) {
    const size_type n = a.size();
    std::vector<size_type> I = iota(n);
    std::sort(std::begin(I), std::end(I), [&](size_type i, size_type j) { return a[i] < a[j] or (a[i] == a[j] and i < j); });
    return I;
}
template < class container, class compare > std::vector<size_type> sort_idx(const container& a, const compare& cmp) {
    const size_type n = a.size();
    std::vector<size_type> I = iota(n);
    std::sort(std::begin(I), std::end(I), [&](size_type i, size_type j) { return cmp(a[i], a[j]) or (a[i] == a[j] and i < j); });
    return std::move(I);
}

struct grid {
    using size_type = int;
    size_type H, W;
    grid(const size_type H, const size_type W) : H(H), W(W) {}
    bool contains(const size_type i, const size_type j) {
        return 0 <= i and i < H and 0 <= j and j < W;
    }
};

using f64 = long double;

template < class T > vector< T >& operator++(vector< T >& a) { for(T& x : a) x++; return a; }
template < class T > vector< T >& operator--(vector< T >& a) { for(T& x : a) x--; return a; }
template < class T > vector< T >  operator++(vector< T >& a, signed) { vector< T > res = a; for(T& x : a) x++; return res; }
template < class T > vector< T >  operator--(vector< T >& a, signed) { vector< T > res = a; for(T& x : a) x--; return res; }

} // namespace macro

using namespace macro;
#line 3 "C.cpp"

#include <atcoder/convolution>
// https://judge.yosupo.jp/submission/129637

template <uint32_t mod>
struct LazyMontgomeryModInt {
  using mint = LazyMontgomeryModInt;
  using i32 = int32_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 ret = mod;
    for (i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret;
    return ret;
  }

  static constexpr u32 r = get_r();
  static constexpr u32 n2 = -u64(mod) % mod;
  static_assert(r * mod == 1, "invalid, r * mod != 1");
  static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30");
  static_assert((mod & 1) == 1, "invalid, mod % 2 == 0");

  u32 a;

  constexpr LazyMontgomeryModInt() : a(0) {}
  constexpr LazyMontgomeryModInt(const int64_t &b)
      : a(reduce(u64(b % mod + mod) * n2)){};

  static constexpr u32 reduce(const u64 &b) {
    return (b + u64(u32(b) * u32(-r)) * mod) >> 32;
  }

  constexpr mint &operator+=(const mint &b) {
    if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod;
    return *this;
  }

  constexpr mint &operator-=(const mint &b) {
    if (i32(a -= b.a) < 0) a += 2 * mod;
    return *this;
  }

  constexpr mint &operator*=(const mint &b) {
    a = reduce(u64(a) * b.a);
    return *this;
  }

  constexpr mint &operator/=(const mint &b) {
    *this *= b.inverse();
    return *this;
  }

  constexpr mint operator+(const mint &b) const { return mint(*this) += b; }
  constexpr mint operator-(const mint &b) const { return mint(*this) -= b; }
  constexpr mint operator*(const mint &b) const { return mint(*this) *= b; }
  constexpr mint operator/(const mint &b) const { return mint(*this) /= b; }
  constexpr bool operator==(const mint &b) const {
    return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a);
  }
  constexpr bool operator!=(const mint &b) const {
    return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a);
  }
  constexpr mint operator-() const { return mint() - mint(*this); }

  constexpr mint pow(u64 n) const {
    mint ret(1), mul(*this);
    while (n > 0) {
      if (n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    }
    return ret;
  }
  
  constexpr mint inverse() const { return pow(mod - 2); }

  friend ostream &operator<<(ostream &os, const mint &b) {
    return os << b.get();
  }

  friend istream &operator>>(istream &is, mint &b) {
    int64_t t;
    is >> t;
    b = LazyMontgomeryModInt<mod>(t);
    return (is);
  }
  
  constexpr u32 get() const {
    u32 ret = reduce(a);
    return ret >= mod ? ret - mod : ret;
  }

  static constexpr u32 get_mod() { return mod; }
};
using namespace std;

template <typename T>
struct Binomial {
  vector<T> fac_, finv_, inv_;
  Binomial(int MAX) : fac_(MAX + 10), finv_(MAX + 10), inv_(MAX + 10) {
    MAX += 9;
    fac_[0] = finv_[0] = inv_[0] = 1;
    for (int i = 1; i <= MAX; i++) fac_[i] = fac_[i - 1] * i;
    finv_[MAX] = fac_[MAX].inverse();
    for (int i = MAX - 1; i > 0; i--) finv_[i] = finv_[i + 1] * (i + 1);
    for (int i = 1; i <= MAX; i++) inv_[i] = finv_[i] * fac_[i - 1];
  }

  inline T fac(int i) const { return fac_[i]; }
  inline T finv(int i) const { return finv_[i]; }
  inline T inv(int i) const { return inv_[i]; }

  T C(int n, int r) const {
    if (n < r || r < 0) return T(0);
    return fac_[n] * finv_[n - r] * finv_[r];
  }

  T C_naive(int n, int r) const {
    if (n < r || r < 0) return T(0);
    T ret = 1;
    for (T i = 1; i <= r; i += T(1)) {
      ret *= n--;
      ret *= i.inverse();
    }
    return ret;
  }

  T P(int n, int r) const {
    if (n < r || r < 0) return T(0);
    return fac_[n] * finv_[n - r];
  }

  T H(int n, int r) const {
    if (n < 0 || r < 0) return (0);
    return r == 0 ? 1 : C(n + r - 1, r);
  }
};


// given y(x=0)...y(k) , return y(x)
template <typename mint>
mint lagrange_interpolation(const vector<mint>& y, long long x,
                            const Binomial<mint>& C) {
  int N = (int)y.size() - 1;
  if (x <= N) return y[x];
  mint ret = 0;
  vector<mint> dp(N + 1, 1), pd(N + 1, 1);
  mint a = x, one = 1;
  for (int i = 0; i < N; i++) dp[i + 1] = dp[i] * a, a -= one;
  for (int i = N; i > 0; i--) pd[i - 1] = pd[i] * a, a += one;
  for (int i = 0; i <= N; i++) {
    mint tmp = y[i] * dp[i] * pd[i] * C.finv(i) * C.finv(N - i);
    ret += ((N - i) & 1) ? -tmp : tmp;
  }
  return ret;
}

// given f(0)...f(k) (deg(f) = k)
// return \sum_{i=0...n-1} a^i f(i)
template <typename mint>
mint sum_of_exp(const vector<mint>& f, mint a, long long n,
                const Binomial<mint>& C) {
  if (n == 0) return mint(0);
  if (a == mint(0)) return f[0];
  if (a == mint(1)) {
    vector<mint> g(f.size() + 1, mint(0));
    for (int i = 1; i < (int)g.size(); i++) g[i] = g[i - 1] + f[i - 1];
    return lagrange_interpolation(g, n, C);
  }
  int K = f.size() - 1;
  vector<mint> g(f.size());
  mint buf = 1;
  for (int i = 0; i < (int)g.size(); i++) g[i] = f[i] * buf, buf *= a;
  for (int i = 1; i < (int)g.size(); i++) g[i] += g[i - 1];
  mint c = 0, buf2 = 1;
  for (int i = 0; i <= K; i++) c += C.C(K + 1, i) * buf2 * g[K - i], buf2 *= -a;
  c /= (-a + 1).pow(K + 1);
  mint buf3 = 1, ia = a.inverse();
  for (int i = 0; i < (int)g.size(); i++) g[i] = (g[i] - c) * buf3, buf3 *= ia;
  mint tn = lagrange_interpolation(g, n - 1, C);
  return tn * a.pow(n - 1) + c;
}

// given f(0)...f(k) (deg(f) = k)
// return \sum_{i=0...infty} a^i f(i)
template <typename mint>
mint sum_of_exp_limit(const vector<mint>& f, mint a, const Binomial<mint>& C) {
  if (a == mint(0)) return f[0];
  int K = f.size() - 1;
  vector<mint> g(f.size());
  mint buf = 1;
  for (int i = 0; i < (int)g.size(); i++) g[i] = f[i] * buf, buf *= a;
  for (int i = 1; i < (int)g.size(); i++) g[i] += g[i - 1];
  mint c = 0, buf2 = 1;
  for (int i = 0; i <= K; i++) c += C.C(K + 1, i) * buf2 * g[K - i], buf2 *= -a;
  c /= (-a + 1).pow(K + 1);
  return c;
}

template <typename mint>
vector<mint> exp_enamurate(int p, int n) {
  vector<mint> f(n + 1, mint(0));
  if (!p) {
    f[0] = 1;
    return std::move(f);
  }
  f[1] = 1;
  vector<bool> sieve(n + 1, false);
  vector<int> ps;
  for (int i = 2; i <= n; i++) {
    if (!sieve[i]) {
      f[i] = mint(i).pow(p);
      ps.push_back(i);
    }
    for (int j = 0; j < (int)ps.size() && i * ps[j] <= n; j++) {
      sieve[i * ps[j]] = 1;
      f[i * ps[j]] = f[i] * f[ps[j]];
      if (i % ps[j] == 0) break;
    }
  }
  return std::move(f);
}

template <typename mint>
mint sum_of_exp2(int d, mint r, long long n, const Binomial<mint>& C) {
  vector<mint> f = exp_enamurate<mint>(d, d);
  return sum_of_exp(f, r, n, C);
}

template <typename mint>
mint sum_of_exp_limit2(int d, mint r, const Binomial<mint>& C) {
  vector<mint> f = exp_enamurate<mint>(d, d);
  return sum_of_exp_limit(f, r, C);
}

using mint = LazyMontgomeryModInt<998244353>;
Binomial<mint> C(1000100);
mint solve(i64 r, i64 d, i64 n) {
    if(n <= 1000) {
        mint ans = 0;
        for(i64 i = 0; i < n; i++) ans += mint(i).pow(d);
        return ans;
    }
    return sum_of_exp2<mint>(d, r, n, C).get();
}

namespace lib {
#define PROBLEM "https://judge.yosupo.jp/problem/factorial"

#line 254 "C.cpp"

#include <atcoder/modint>

using mint = atcoder::modint998244353;

#line 261 "C.cpp"

#line 264 "C.cpp"

namespace suisen {
    template <typename T, typename U = T>
    struct factorial {
        factorial() = default;
        factorial(int n) { ensure(n); }

        static void ensure(const int n) {
            int sz = _fac.size();
            if (n + 1 <= sz) return;
            int new_size = std::max(n + 1, sz * 2);
            _fac.resize(new_size), _fac_inv.resize(new_size);
            for (int i = sz; i < new_size; ++i) _fac[i] = _fac[i - 1] * i;
            _fac_inv[new_size - 1] = U(1) / _fac[new_size - 1];
            for (int i = new_size - 1; i > sz; --i) _fac_inv[i - 1] = _fac_inv[i] * i;
        }

        T fac(const int i) {
            ensure(i);
            return _fac[i];
        }
        T operator()(int i) {
            return fac(i);
        }
        U fac_inv(const int i) {
            ensure(i);
            return _fac_inv[i];
        }
        U binom(const int n, const int r) {
            if (n < 0 or r < 0 or n < r) return 0;
            ensure(n);
            return _fac[n] * _fac_inv[r] * _fac_inv[n - r];
        }
        U perm(const int n, const int r) {
            if (n < 0 or r < 0 or n < r) return 0;
            ensure(n);
            return _fac[n] * _fac_inv[n - r];
        }
    private:
        static std::vector<T> _fac;
        static std::vector<U> _fac_inv;
    };
    template <typename T, typename U>
    std::vector<T> factorial<T, U>::_fac{ 1 };
    template <typename T, typename U>
    std::vector<U> factorial<T, U>::_fac_inv{ 1 };
} // namespace suisen

#line 313 "C.cpp"

namespace suisen {
    template <typename mint>
    std::vector<mint> shift_of_sampling_points(const std::vector<mint>& ys, mint t, int m) {
        const int n = ys.size();
        factorial<mint> fac(std::max(n, m));

        std::vector<mint> b = [&] {
            std::vector<mint> f(n), g(n);
            for (int i = 0; i < n; ++i) {
                f[i] = ys[i] * fac.fac_inv(i);
                g[i] = (i & 1 ? -1 : 1) * fac.fac_inv(i);
            }
            std::vector<mint> b = atcoder::convolution(f, g);
            b.resize(n);
            return b;
        }();
        std::vector<mint> e = [&] {
            std::vector<mint> c(n);
            mint prd = 1;
            std::reverse(b.begin(), b.end());
            for (int i = 0; i < n; ++i) {
                b[i] *= fac.fac(n - i - 1);
                c[i] = prd * fac.fac_inv(i);
                prd *= t - i;
            }
            std::vector<mint> e = atcoder::convolution(b, c);
            e.resize(n);
            return e;
        }();
        std::reverse(e.begin(), e.end());
        for (int i = 0; i < n; ++i) {
            e[i] *= fac.fac_inv(i);
        }

        std::vector<mint> f(m);
        for (int i = 0; i < m; ++i) f[i] = fac.fac_inv(i);
        std::vector<mint> res = atcoder::convolution(e, f);
        res.resize(m);
        for (int i = 0; i < m; ++i) res[i] *= fac.fac(i);
        return res;
    }
} // namespace suisen

namespace suisen {
    template <typename mint>
    struct FactorialLarge {
        static constexpr int _p = mint::mod();
        static constexpr int _log_b = 15;
        static constexpr int _b = 1 << _log_b;
        static constexpr int _q = _p >> _log_b;

        FactorialLarge() {
            // f_d(x) := (dx+1)*...*(dx+d-1)

            // Suppose that we have f_d(0),...,f_d(d-1). (Note that (deg f_d)+1=d)
            // f_{2d}(x) = ((2dx+1)*...*(2dx+d-1)) * (2dx+d) * (((2dx+d)+1)* ...*((2dx+d)+d-1))
            //           = f_d(2x) * f_d(2x+1) * (2dx+d)
            // We can calculate f_{2d}(0), ..., f_{2d}(2d-1) from f_d(0), f_d(1), ..., f_d(4d-2), f_d(4d-1)

            std::vector<mint> f{ 1 };
            f.reserve(_b);
            for (int i = 0; i < _log_b; ++i) {
                std::vector<mint> g = shift_of_sampling_points<mint>(f, 1 << i, 3 << i);
                const auto get = [&](int j) { return j < (1 << i) ? f[j] : g[j - (1 << i)]; };
                f.resize(2 << i);
                for (int j = 0; j < 2 << i; ++j) {
                    // (2*j+1)*2^i <= 2^(2*_log_b) + 2^(_log_b-1) < 2^31 holds if _log_b <= 15
                    f[j] = get(2 * j) * get(2 * j + 1) * ((2 * j + 1) << i);
                }
            }
            // f_B(x) = (x+1) * ... * (x+B-1)
            if (_q > _b) {
                std::vector<mint> g = shift_of_sampling_points<mint>(f, _b, _q - _b);
                std::move(g.begin(), g.end(), std::back_inserter(f));
            } else {
                f.resize(_q);
            }
            for (int i = 0; i < _q; ++i) {
                f[i] *= mint(i + 1) * _b;
            }
            // f[i] = (i*B + 1) * ... * (i*B + B)
            _acc = std::move(f);

            _acc.insert(_acc.begin(), 1);
            for (int i = 1; i <= _q; ++i) {
                _acc[i] *= _acc[i - 1];
            }
        }

        mint operator()(long long n) {
            if (_p <= n) return 0;
            const int q = n >> _log_b, r = n & (_b - 1);
            // n! = (qb)! * (n-r+1)(n-r+2)...(n)
            mint ans = _acc[q];
            for (int j = 0; j < r; ++j) {
                ans *= mint::raw(n - j);
            }
            return ans;
        }
    private:
        std::vector<mint> _acc;
    };
} // namespace suisen

}

int main() {
    i64 N = in(), K = in();
    mint ans = 0;
    ans += solve(1, K, N) * N - solve(1, K + 1, N);
    lib::suisen::FactorialLarge<atcoder::modint998244353> fact;
    print(ans * fact(N - 1).val() * 2);
}
yukicoder

結果

ソースコード