
問題 No.1720 Division Permutation
ユーザー ei1333333ei1333333
提出日時 2021-10-22 23:34:34
言語 C++17
(gcc 12.3.0 + boost 1.83.0)
実行時間 912 ms / 4,000 ms
コード長 8,497 bytes
コンパイル時間 8,223 ms
コンパイル使用メモリ 220,268 KB
実行使用メモリ 327,000 KB
最終ジャッジ日時 2024-09-24 06:19:52
合計ジャッジ時間 34,434 ms
judge5 / judge2


入力 結果 実行時間
testcase_00 AC 2 ms
6,816 KB
testcase_01 AC 2 ms
6,816 KB
testcase_02 AC 2 ms
6,940 KB
testcase_03 AC 2 ms
6,944 KB
testcase_04 AC 2 ms
6,944 KB
testcase_05 AC 2 ms
6,940 KB
testcase_06 AC 2 ms
6,940 KB
testcase_07 AC 2 ms
6,944 KB
testcase_08 AC 2 ms
6,944 KB
testcase_09 AC 2 ms
6,944 KB
testcase_10 AC 1 ms
6,940 KB
testcase_11 AC 1 ms
6,944 KB
testcase_12 AC 2 ms
6,940 KB
testcase_13 AC 912 ms
326,676 KB
testcase_14 AC 907 ms
326,628 KB
testcase_15 AC 528 ms
205,544 KB
testcase_16 AC 862 ms
315,132 KB
testcase_17 AC 571 ms
219,476 KB
testcase_18 AC 682 ms
327,000 KB
testcase_19 AC 690 ms
326,712 KB
testcase_20 AC 696 ms
326,740 KB
testcase_21 AC 694 ms
326,788 KB
testcase_22 AC 699 ms
326,708 KB
testcase_23 AC 694 ms
326,716 KB
testcase_24 AC 694 ms
326,832 KB
testcase_25 AC 695 ms
326,860 KB
testcase_26 AC 699 ms
326,836 KB
testcase_27 AC 700 ms
326,748 KB
testcase_28 AC 698 ms
326,708 KB
testcase_29 AC 677 ms
326,816 KB
testcase_30 AC 699 ms
326,684 KB
testcase_31 AC 710 ms
326,704 KB
testcase_32 AC 2 ms
6,940 KB
testcase_33 AC 3 ms
6,944 KB
testcase_34 AC 2 ms
6,940 KB
testcase_35 AC 2 ms
6,944 KB
testcase_36 AC 2 ms
6,940 KB
testcase_37 AC 1 ms
6,940 KB
testcase_38 AC 2 ms
6,944 KB
testcase_39 AC 2 ms
6,944 KB
testcase_40 AC 2 ms
6,940 KB
testcase_41 AC 2 ms
6,944 KB
testcase_42 AC 2 ms
6,940 KB
testcase_43 AC 681 ms
280,280 KB
testcase_44 AC 603 ms
259,720 KB
testcase_45 AC 565 ms
233,744 KB
testcase_46 AC 410 ms
181,144 KB
testcase_47 AC 580 ms
242,928 KB
testcase_48 AC 444 ms
193,896 KB
testcase_49 AC 698 ms
290,488 KB
testcase_50 AC 536 ms
231,252 KB
testcase_51 AC 441 ms
192,704 KB
testcase_52 AC 625 ms
263,220 KB
testcase_53 AC 815 ms
326,592 KB
testcase_54 AC 885 ms
326,588 KB
testcase_55 AC 773 ms
326,644 KB
testcase_56 AC 787 ms
326,628 KB
testcase_57 AC 888 ms
326,640 KB
testcase_58 AC 804 ms
326,640 KB
testcase_59 AC 798 ms
326,700 KB
testcase_60 AC 858 ms
326,504 KB
testcase_61 AC 779 ms
326,532 KB
testcase_62 AC 799 ms
326,524 KB


diff #

#include <bits/stdc++.h>

using namespace std;

using int64 = long long;
// const int mod = 1e9 + 7;
const int mod = 998244353;

const int64 infll = (1LL << 62) - 1;
const int inf = (1 << 30) - 1;

struct IoSetup {
  IoSetup() {
    cout << fixed << setprecision(10);
    cerr << fixed << setprecision(10);
} iosetup;

template< typename T1, typename T2 >
ostream &operator<<(ostream &os, const pair< T1, T2 > &p) {
  os << p.first << " " << p.second;
  return os;

template< typename T1, typename T2 >
istream &operator>>(istream &is, pair< T1, T2 > &p) {
  is >> p.first >> p.second;
  return is;

template< typename T >
ostream &operator<<(ostream &os, const vector< T > &v) {
  for(int i = 0; i < (int) v.size(); i++) {
    os << v[i] << (i + 1 != v.size() ? " " : "");
  return os;

template< typename T >
istream &operator>>(istream &is, vector< T > &v) {
  for(T &in: v) is >> in;
  return is;

template< typename T1, typename T2 >
inline bool chmax(T1 &a, T2 b) { return a < b && (a = b, true); }

template< typename T1, typename T2 >
inline bool chmin(T1 &a, T2 b) { return a > b && (a = b, true); }

template< typename T = int64 >
vector< T > make_v(size_t a) {
  return vector< T >(a);

template< typename T, typename... Ts >
auto make_v(size_t a, Ts... ts) {
  return vector< decltype(make_v< T >(ts...)) >(a, make_v< T >(ts...));

template< typename T, typename V >
typename enable_if< is_class< T >::value == 0 >::type fill_v(T &t, const V &v) {
  t = v;

template< typename T, typename V >
typename enable_if< is_class< T >::value != 0 >::type fill_v(T &t, const V &v) {
  for(auto &e: t) fill_v(e, v);

template< typename F >
struct FixPoint : F {
  FixPoint(F &&f) : F(forward< F >(f)) {}

  template< typename... Args >
  decltype(auto) operator()(Args &&... args) const {
    return F::operator()(*this, forward< Args >(args)...);

template< typename F >
inline decltype(auto) MFP(F &&f) {
  return FixPoint< F >{forward< F >(f)};

 * @brief Montgomery ModInt
template< uint32_t mod, bool fast = false >
struct MontgomeryModInt {
  using mint = MontgomeryModInt;
  using i32 = int32_t;
  using i64 = int64_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 ret = mod;
    for(i32 i = 0; i < 4; i++) ret *= 2 - mod * ret;
    return ret;

  static constexpr u32 r = get_r();
  static constexpr u32 n2 = -u64(mod) % mod;

  static_assert(r * mod == 1, "invalid, r * mod != 1");
  static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30");
  static_assert((mod & 1) == 1, "invalid, mod % 2 == 0");

  u32 x;

  MontgomeryModInt() : x{} {}

  MontgomeryModInt(const i64 &a)
      : x(reduce(u64(fast ? a : (a % mod + mod)) * n2)) {}

  static constexpr u32 reduce(const u64 &b) {
    return u32(b >> 32) + mod - u32((u64(u32(b) * r) * mod) >> 32);

  mint &operator+=(const mint &p) {
    if(i32(x += p.x - 2 * mod) < 0) x += 2 * mod;
    return *this;

  mint &operator-=(const mint &p) {
    if(i32(x -= p.x) < 0) x += 2 * mod;
    return *this;

  mint &operator*=(const mint &p) {
    x = reduce(u64(x) * p.x);
    return *this;

  mint &operator/=(const mint &p) {
    *this *= p.inverse();
    return *this;

  mint operator-() const { return mint() - *this; }

  mint operator+(const mint &p) const { return mint(*this) += p; }

  mint operator-(const mint &p) const { return mint(*this) -= p; }

  mint operator*(const mint &p) const { return mint(*this) *= p; }

  mint operator/(const mint &p) const { return mint(*this) /= p; }

  bool operator==(const mint &p) const { return (x >= mod ? x - mod : x) == (p.x >= mod ? p.x - mod : p.x); }

  bool operator!=(const mint &p) const { return (x >= mod ? x - mod : x) != (p.x >= mod ? p.x - mod : p.x); }

  u32 get() const {
    u32 ret = reduce(x);
    return ret >= mod ? ret - mod : ret;

  mint pow(u64 n) const {
    mint ret(1), mul(*this);
    while(n > 0) {
      if(n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    return ret;

  mint inverse() const {
    return pow(mod - 2);

  friend ostream &operator<<(ostream &os, const mint &p) {
    return os << p.get();

  friend istream &operator>>(istream &is, mint &a) {
    i64 t;
    is >> t;
    a = mint(t);
    return is;

  static u32 get_mod() { return mod; }

using modint = MontgomeryModInt< mod >;

int main() {
  int N, K;
  cin >> N >> K;
  vector< int > P(N);
  cin >> P;
  for(auto &p: P) --p;
  vector< int > latte(N), malta(N);
  auto dp = make_v< modint >(N + 1, K + 1);
  dp[0][0] = 1;
  vector< int > used(N, -1);
  vector< deque< int > > qsR(N + N);
  auto dp_buf = make_v< modint >(N + N, K + 1);
  int time_stamp = 0;
  auto rec = MFP([&](auto rec, int l, int r) -> void {
    if(l + 1 == r) {
      for(int k = 0; k < K; k++) {
        dp[l + 1][k + 1] += dp[l][k];
    int m = (l + r) / 2;
    rec(l, m);
    for(int k = m - 1, p = inf, q = -inf; k >= l; k--) {
      chmin(p, P[k]);
      chmax(q, P[k]);
      latte[k] = p;
      malta[k] = q;
    for(int k = m, p = inf, q = -inf; k < r; k++) {
      chmin(p, P[k]);
      chmax(q, P[k]);
      latte[k] = p;
      malta[k] = q;
      auto fix = [&](int que_idx, int low) {
        while(not qsR[que_idx].empty() and malta[qsR[que_idx].front()] <= low) {
          for(int k = 0; k < K; k++) {
            dp_buf[que_idx][k] -= dp[qsR[que_idx].front()][k];
      int t = m;
      for(int s = m - 1; s >= l; s--) {
        while(t < r and latte[s] < latte[t]) {
          int p = t;
          int v = latte[p];
          int Y = p + v - malta[p];
          if(l <= Y and Y < m and used[Y] == time_stamp and malta[p] > malta[Y]) {
            for(int k = 0; k < K; k++) {
              dp[p + 1][k + 1] += dp[Y][k];
          fix(p + v, malta[p]);
          for(int k = 0; k < K; k++) {
            dp[p + 1][k + 1] += dp_buf[p + v][k];
          used[p] = time_stamp;
        int p = s;
        int v = latte[p];
        int Y = malta[p] + p - v;
        if(m <= Y and Y < r and used[Y] == time_stamp and malta[p] > malta[Y]) {
          for(int k = 0; k < K; k++) {
            dp[Y + 1][k + 1] += dp[p][k];
        used[p] = time_stamp;
        qsR[malta[p] + p].emplace_back(p);
        for(int k = 0; k < K; k++) {
          dp_buf[malta[p] + p][k] += dp[p][k];
      while(t < r) {
        int p = t;
        int v = latte[p];
        int Y = p + v - malta[p];
        if(l <= Y and Y < m and used[Y] == time_stamp and malta[p] > malta[Y]) {
          for(int k = 0; k < K; k++) {
            dp[p + 1][k + 1] += dp[Y][k];
        fix(p + v, malta[p]);
        for(int k = 0; k < K; k++) {
          dp[p + 1][k + 1] += dp_buf[p + v][k];
        used[p] = time_stamp;
    for(int k = l; k < m; k++) {
      qsR[malta[k] + k].clear();
      for(int i = 0; i < K; i++) dp_buf[malta[k] + k][i] = 0;
      auto fix = [&](int que_idx, int low) {
        while(not qsR[que_idx].empty() and malta[qsR[que_idx].front()] >= low) {
          for(int k = 0; k < K; k++) {
            dp_buf[que_idx][k] -= dp[qsR[que_idx].front()][k];
      int t = r - 1;
      for(int s = l; s < m; s++) {
        while(t >= m and latte[s] > latte[t]) {
          int p = t;
          int v = latte[p];
          fix(N + malta[p] - p, malta[p]);
          for(int k = 0; k < K; k++) {
            dp[p + 1][k + 1] += dp_buf[N + malta[p] - p][k];
        int p = s;
        int v = latte[p];
        qsR[N + v - p].emplace_back(p);
        for(int k = 0; k < K; k++) {
          dp_buf[N + v - p][k] += dp[p][k];
      while(t >= m) {
        int p = t;
        int v = latte[p];
        fix(N + malta[p] - p, malta[p]);
        for(int k = 0; k < K; k++) {
          dp[p + 1][k + 1] += dp_buf[N + malta[p] - p][k];
    for(int k = l; k < m; k++) {
      qsR[N + latte[k] - k].clear();
      for(int i = 0; i < K; i++) dp_buf[N + latte[k] - k][i] = 0;
    rec(m, r);
  rec(0, N);
  for(int i = 1; i <= K; i++) cout << dp[N][i] << "\n";