結果

問題 No.2243 Coaching Schedule
ユーザー satashunsatashun
提出日時 2023-05-16 09:37:40
言語 C++17
(gcc 12.3.0 + boost 1.83.0)
結果
AC  
実行時間 1,809 ms / 4,000 ms
コード長 22,371 bytes
コンパイル時間 4,339 ms
コンパイル使用メモリ 232,900 KB
実行使用メモリ 131,852 KB
最終ジャッジ日時 2023-08-21 03:16:05
合計ジャッジ時間 47,973 ms
ジャッジサーバーID
(参考情報)
judge11 / judge14
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 100 ms
112,748 KB
testcase_01 AC 104 ms
112,508 KB
testcase_02 AC 101 ms
112,596 KB
testcase_03 AC 105 ms
112,652 KB
testcase_04 AC 103 ms
112,972 KB
testcase_05 AC 1,794 ms
131,728 KB
testcase_06 AC 1,806 ms
131,556 KB
testcase_07 AC 1,800 ms
131,564 KB
testcase_08 AC 1,803 ms
131,500 KB
testcase_09 AC 1,797 ms
131,692 KB
testcase_10 AC 1,803 ms
131,608 KB
testcase_11 AC 1,806 ms
131,644 KB
testcase_12 AC 1,809 ms
131,352 KB
testcase_13 AC 1,799 ms
131,656 KB
testcase_14 AC 1,795 ms
131,344 KB
testcase_15 AC 1,794 ms
131,428 KB
testcase_16 AC 1,133 ms
131,852 KB
testcase_17 AC 447 ms
118,356 KB
testcase_18 AC 1,520 ms
128,928 KB
testcase_19 AC 1,702 ms
130,900 KB
testcase_20 AC 876 ms
124,008 KB
testcase_21 AC 830 ms
121,860 KB
testcase_22 AC 769 ms
120,724 KB
testcase_23 AC 166 ms
114,112 KB
testcase_24 AC 862 ms
123,604 KB
testcase_25 AC 226 ms
114,636 KB
testcase_26 AC 440 ms
117,372 KB
testcase_27 AC 884 ms
124,352 KB
testcase_28 AC 247 ms
114,992 KB
testcase_29 AC 1,528 ms
128,516 KB
testcase_30 AC 1,652 ms
130,476 KB
testcase_31 AC 1,717 ms
131,356 KB
testcase_32 AC 732 ms
120,640 KB
testcase_33 AC 428 ms
118,044 KB
testcase_34 AC 1,499 ms
128,292 KB
testcase_35 AC 1,616 ms
129,696 KB
testcase_36 AC 1,641 ms
129,668 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#pragma region satashun
// #pragma GCC optimize("Ofast")
// #pragma GCC optimize("unroll-loops")
#include <bits/stdc++.h>
using namespace std;

using uint = unsigned int;
using ll = long long;
using ull = unsigned long long;
using pii = pair<int, int>;
template <class T>
using V = vector<T>;
template <class T>
using VV = V<V<T>>;

template <class T>
V<T> make_vec(size_t a) {
    return V<T>(a);
}

template <class T, class... Ts>
auto make_vec(size_t a, Ts... ts) {
    return V<decltype(make_vec<T>(ts...))>(a, make_vec<T>(ts...));
}

template <typename T, typename V>
void fill_vec(T& v, const V& val) {
    v = val;
}

template <typename T, typename V>
void fill_vec(vector<T>& vec, const V& val) {
    for (auto& v : vec) fill_vec(v, val);
}

#define pb push_back
#define eb emplace_back
#define mp make_pair
#define fi first
#define se second
#define rep(i, n) rep2(i, 0, n)
#define rep2(i, m, n) for (int i = m; i < (n); i++)
#define per(i, b) per2(i, 0, b)
#define per2(i, a, b) for (int i = int(b) - 1; i >= int(a); i--)
#define ALL(c) (c).begin(), (c).end()
#define SZ(x) ((int)(x).size())

constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n - 1); }

template <class T, class U>
void chmin(T& t, const U& u) {
    if (t > u) t = u;
}
template <class T, class U>
void chmax(T& t, const U& u) {
    if (t < u) t = u;
}

template <typename T>
int arglb(const V<T>& v, const T& x) {
    return distance(v.begin(), lower_bound(ALL(v), x));
}

template <typename T>
int argub(const V<T>& v, const T& x) {
    return distance(v.begin(), upper_bound(ALL(v), x));
}

template <class T>
void mkuni(vector<T>& v) {
    sort(ALL(v));
    v.erase(unique(ALL(v)), end(v));
}

template <class T>
vector<int> sort_by(const vector<T>& v, bool increasing = true) {
    vector<int> res(v.size());
    iota(res.begin(), res.end(), 0);

    if (increasing) {
        stable_sort(res.begin(), res.end(),
                    [&](int i, int j) { return v[i] < v[j]; });
    } else {
        stable_sort(res.begin(), res.end(),
                    [&](int i, int j) { return v[i] > v[j]; });
    }
    return res;
}

template <class T, class U>
istream& operator>>(istream& is, pair<T, U>& p) {
    is >> p.first >> p.second;
    return is;
}

template <class T, class U>
ostream& operator<<(ostream& os, const pair<T, U>& p) {
    os << "(" << p.first << "," << p.second << ")";
    return os;
}

template <class T>
istream& operator>>(istream& is, vector<T>& v) {
    for (auto& x : v) {
        is >> x;
    }
    return is;
}

template <class T>
ostream& operator<<(ostream& os, const vector<T>& v) {
    os << "{";
    rep(i, v.size()) {
        if (i) os << ",";
        os << v[i];
    }
    os << "}";
    return os;
}

template <class T>
ostream& operator<<(ostream& os, const set<T>& ST) {
    os << "{";
    for (auto it = ST.begin(); it != ST.end(); ++it) {
        if (it != ST.begin()) os << ",";
        os << *it;
    }
    os << "}";
    return os;
}

template <class T>
ostream& operator<<(ostream& os, const multiset<T>& ST) {
    os << "{";
    for (auto it = ST.begin(); it != ST.end(); ++it) {
        if (it != ST.begin()) os << ",";
        os << *it;
    }
    os << "}";
    return os;
}

template <class T, class U>
ostream& operator<<(ostream& os, const map<T, U>& MP) {
    for (auto it = MP.begin(); it != MP.end(); ++it) {
        os << "(" << it->first << ": " << it->second << ")";
    }
    return os;
}

string to_string(__int128_t x) {
    if (x == 0) return "0";
    string result;
    if (x < 0) {
        result += "-";
        x *= -1;
    }
    string t;
    while (x) {
        t.push_back('0' + x % 10);
        x /= 10;
    }
    reverse(t.begin(), t.end());
    return result + t;
}

ostream& operator<<(ostream& o, __int128_t x) { return o << to_string(x); }

#ifdef LOCAL
void debug_out() { cerr << endl; }
template <typename Head, typename... Tail>
void debug_out(Head H, Tail... T) {
    cerr << " " << H;
    debug_out(T...);
}
#define debug(...) \
    cerr << __LINE__ << " [" << #__VA_ARGS__ << "]:", debug_out(__VA_ARGS__)
#define dump(x) cerr << __LINE__ << " " << #x << " = " << (x) << endl
#else
#define debug(...) (void(0))
#define dump(x) (void(0))
#endif

template <class T>
V<T>& operator+=(V<T>& vec, const T& v) {
    for (auto& x : vec) x += v;
    return vec;
}

template <class T>
V<T>& operator-=(V<T>& vec, const T& v) {
    for (auto& x : vec) x -= v;
    return vec;
}

// suc : 1 = newline, 2 = space
template <class T>
void print(T x, int suc = 1) {
    cout << x;
    if (suc == 1)
        cout << "\n";
    else if (suc == 2)
        cout << " ";
}

template <class T>
void print(const vector<T>& v, int suc = 1) {
    for (int i = 0; i < v.size(); ++i)
        print(v[i], i == int(v.size()) - 1 ? suc : 2);
}

template <class T>
void show(T x) {
    print(x, 1);
}

template <typename Head, typename... Tail>
void show(Head H, Tail... T) {
    print(H, 2);
    show(T...);
}

int topbit(int t) { return t == 0 ? -1 : 31 - __builtin_clz(t); }
int topbit(ll t) { return t == 0 ? -1 : 63 - __builtin_clzll(t); }
int botbit(int a) { return a == 0 ? 32 : __builtin_ctz(a); }
int botbit(ll a) { return a == 0 ? 64 : __builtin_ctzll(a); }
int popcount(int t) { return __builtin_popcount(t); }
int popcount(ll t) { return __builtin_popcountll(t); }
int bit_parity(int t) { return __builtin_parity(t); }
int bit_parity(ll t) { return __builtin_parityll(t); }

struct prepare_io {
    prepare_io() {
        cin.tie(nullptr);
        ios::sync_with_stdio(false);
        cout << fixed << setprecision(10);
    }
} prep_io;
#pragma endregion satashun

template <unsigned int MOD>
struct ModInt {
    using uint = unsigned int;
    using ull = unsigned long long;
    using M = ModInt;

    uint v;

    ModInt(ll _v = 0) { set_norm(_v % MOD + MOD); }
    M& set_norm(uint _v) {  //[0, MOD * 2)->[0, MOD)
        v = (_v < MOD) ? _v : _v - MOD;
        return *this;
    }

    explicit operator bool() const { return v != 0; }
    explicit operator int() const { return v; }
    M operator+(const M& a) const { return M().set_norm(v + a.v); }
    M operator-(const M& a) const { return M().set_norm(v + MOD - a.v); }
    M operator*(const M& a) const { return M().set_norm(ull(v) * a.v % MOD); }
    M operator/(const M& a) const { return *this * a.inv(); }
    M& operator+=(const M& a) { return *this = *this + a; }
    M& operator-=(const M& a) { return *this = *this - a; }
    M& operator*=(const M& a) { return *this = *this * a; }
    M& operator/=(const M& a) { return *this = *this / a; }
    M operator-() const { return M() - *this; }
    M& operator++(int) { return *this = *this + 1; }
    M& operator--(int) { return *this = *this - 1; }

    M pow(ll n) const {
        if (n < 0) return inv().pow(-n);
        M x = *this, res = 1;
        while (n) {
            if (n & 1) res *= x;
            x *= x;
            n >>= 1;
        }
        return res;
    }

    M inv() const {
        ll a = v, b = MOD, p = 1, q = 0, t;
        while (b != 0) {
            t = a / b;
            swap(a -= t * b, b);
            swap(p -= t * q, q);
        }
        return M(p);
    }

    friend ostream& operator<<(ostream& os, const M& a) { return os << a.v; }
    friend istream& operator>>(istream& in, M& x) {
        ll v_;
        in >> v_;
        x = M(v_);
        return in;
    }

    bool operator<(const M& r) const { return v < r.v; }
    bool operator>(const M& r) const { return v < *this; }
    bool operator<=(const M& r) const { return !(r < *this); }
    bool operator>=(const M& r) const { return !(*this < r); }
    bool operator==(const M& a) const { return v == a.v; }
    bool operator!=(const M& a) const { return v != a.v; }

    static uint get_mod() { return MOD; }
};

// using Mint = ModInt<1000000007>;
using Mint = ModInt<998244353>;

V<Mint> fact, ifact, inv;
VV<Mint> small_comb;

void mod_init() {
    const int maxv = 1000010;
    const int maxvv = 5000;

    fact.resize(maxv);
    ifact.resize(maxv);
    inv.resize(maxv);

    small_comb = make_vec<Mint>(maxvv, maxvv);

    fact[0] = 1;
    for (int i = 1; i < maxv; ++i) {
        fact[i] = fact[i - 1] * i;
    }

    ifact[maxv - 1] = fact[maxv - 1].inv();

    for (int i = maxv - 2; i >= 0; --i) {
        ifact[i] = ifact[i + 1] * (i + 1);
    }

    for (int i = 1; i < maxv; ++i) {
        inv[i] = ifact[i] * fact[i - 1];
    }

    for (int i = 0; i < maxvv; ++i) {
        small_comb[i][0] = small_comb[i][i] = 1;
        for (int j = 1; j < i; ++j) {
            small_comb[i][j] = small_comb[i - 1][j] + small_comb[i - 1][j - 1];
        }
    }
}

Mint comb(int n, int r) {
    if (n < 0 || r < 0 || r > n) return Mint(0);
    if (n < small_comb.size()) return small_comb[n][r];
    return fact[n] * ifact[r] * ifact[n - r];
}

Mint inv_comb(int n, int r) {
    if (n < 0 || r < 0 || r > n) return Mint(0);
    return ifact[n] * fact[r] * fact[n - r];
}

// O(k)
Mint comb_slow(ll n, ll k) {
    if (n < 0 || k < 0 || k > n) return Mint(0);
    Mint res = ifact[k];
    for (int i = 0; i < k; ++i) {
        res = res * (n - i);
    }
    return res;
}

// line up
// a 'o' + b 'x'
Mint comb2(int a, int b) {
    if (a < 0 || b < 0) return 0;
    return comb(a + b, a);
}

// divide a into b groups
Mint nhr(int a, int b) {
    if (b == 0) return Mint(a == 0);
    return comb(a + b - 1, a);
}

// O(p + log_p n)

Mint lucas(ll n, ll k, int p) {
    if (n < 0 || k < 0 || k > n) return Mint(0);
    Mint res = 1;
    while (n > 0) {
        res *= comb(n % p, k % p);
        n /= p;
        k /= p;
    }
    return res;
}

struct ModPrepare {
    ModPrepare() { mod_init(); }
} prep_mod;

/**
 * @docs docs/ntt.md
 */

template <class D>
struct NumberTheoreticTransform {
    D root;
    V<D> roots = {0, 1};
    V<int> rev = {0, 1};
    int base = 1, max_base = -1;

    void init() {
        int mod = D::get_mod();
        int tmp = mod - 1;
        max_base = 0;
        while (tmp % 2 == 0) {
            tmp /= 2;
            max_base++;
        }

        root = 2;

        while (true) {
            if (root.pow(1 << max_base).v == 1) {
                if (root.pow(1 << (max_base - 1)).v != 1) {
                    break;
                }
            }
            root++;
        }
    }

    void ensure_base(int nbase) {
        if (max_base == -1) init();
        if (nbase <= base) return;
        assert(nbase <= max_base);

        rev.resize(1 << nbase);
        for (int i = 0; i < (1 << nbase); ++i) {
            rev[i] = (rev[i >> 1] >> 1) + ((i & 1) << (nbase - 1));
        }
        roots.resize(1 << nbase);

        while (base < nbase) {
            D z = root.pow(1 << (max_base - 1 - base));
            for (int i = 1 << (base - 1); i < (1 << base); ++i) {
                roots[i << 1] = roots[i];
                roots[(i << 1) + 1] = roots[i] * z;
            }
            ++base;
        }
    }

    void ntt(V<D>& a, bool inv = false) {
        int n = a.size();
        // assert((n & (n - 1)) == 0);
        int zeros = __builtin_ctz(n);
        ensure_base(zeros);
        int shift = base - zeros;

        for (int i = 0; i < n; i++) {
            if (i < (rev[i] >> shift)) {
                swap(a[i], a[rev[i] >> shift]);
            }
        }

        for (int k = 1; k < n; k <<= 1) {
            for (int i = 0; i < n; i += 2 * k) {
                for (int j = 0; j < k; j++) {
                    D x = a[i + j];
                    D y = a[i + j + k] * roots[j + k];
                    a[i + j] = x + y;
                    a[i + j + k] = x - y;
                }
            }
        }

        int v = D(n).inv().v;
        if (inv) {
            reverse(a.begin() + 1, a.end());
            for (int i = 0; i < n; i++) {
                a[i] *= v;
            }
        }
    }

    V<D> mul(V<D> a, V<D> b) {
        if (a.size() == 0 && b.size() == 0) return {};
        int s = a.size() + b.size() - 1;
        int nbase = 1;
        while ((1 << nbase) < s) nbase++;
        int sz = 1 << nbase;

        if (sz <= 16) {
            V<D> ret(s);
            for (int i = 0; i < a.size(); i++) {
                for (int j = 0; j < b.size(); j++) ret[i + j] += a[i] * b[j];
            }
            return ret;
        }

        a.resize(sz);
        b.resize(sz);
        ntt(a);
        ntt(b);

        for (int i = 0; i < sz; i++) {
            a[i] *= b[i];
        }
        ntt(a, true);

        a.resize(s);
        return a;
    }
};

// T : modint
template <class T>
void ntt_2d(VV<T>& a, bool rev) {
    if (a.size() == 0 || a[0].size() == 0) return;
    int h = a.size(), w = a[0].size();

    NumberTheoreticTransform<T> fft;
    fft.init();

    for (auto& v : a) {
        fft.ntt(v, rev);
    }

    rep(j, w) {
        V<T> vh(h);
        rep(i, h) { vh[i] = a[i][j]; }
        fft.ntt(vh, rev);
        rep(i, h) { a[i][j] = vh[i]; }
    }
}

// depends on FFT libs
// work only with NTT-friendly mod

NumberTheoreticTransform<Mint> ntt;

struct prepare_FPS {
    prepare_FPS() { ntt.init(); }
} prep_FPS;

template <class D>
struct Poly : public V<D> {
    template <class... Args>
    Poly(Args... args) : V<D>(args...) {}
    Poly(initializer_list<D> init) : V<D>(init.begin(), init.end()) {}

    int size() const { return V<D>::size(); }
    D at(int p) const { return (p < this->size() ? (*this)[p] : D(0)); }

    void shrink() {
        while (this->size() > 0 && this->back() == D(0)) this->pop_back();
    }

    // first len terms
    Poly pref(int len) const {
        return Poly(this->begin(), this->begin() + min(this->size(), len));
    }

    // for polynomial division
    Poly rev() const {
        Poly res = *this;
        reverse(res.begin(), res.end());
        return res;
    }

    Poly shiftr(int d) const {
        int n = max(size() + d, 0);
        Poly res(n);
        for (int i = 0; i < size(); ++i) {
            if (i + d >= 0) {
                res[i + d] = at(i);
            }
        }
        return res;
    }

    Poly operator+(const Poly& r) const {
        auto n = max(size(), r.size());
        V<D> tmp(n);
        for (int i = 0; i < n; ++i) {
            tmp[i] = at(i) + r.at(i);
        }
        return tmp;
    }
    Poly operator-(const Poly& r) const {
        auto n = max(size(), r.size());
        V<D> tmp(n);
        for (int i = 0; i < n; ++i) {
            tmp[i] = at(i) - r.at(i);
        }
        return tmp;
    }

    // scalar
    Poly operator*(const D& k) const {
        int n = size();
        V<D> tmp(n);
        for (int i = 0; i < n; ++i) {
            tmp[i] = at(i) * k;
        }
        return tmp;
    }

    Poly operator*(const Poly& r) const {
        Poly a = *this;
        Poly b = r;
        auto v = ntt.mul(a, b);
        return v;
    }

    // scalar
    Poly operator/(const D& k) const { return *this * k.inv(); }

    Poly operator/(const Poly& r) const {
        if (size() < r.size()) {
            return {{}};
        }
        int d = size() - r.size() + 1;
        return (rev().pref(d) * r.rev().inv(d)).pref(d).rev();
    }

    Poly operator%(const Poly& r) const {
        auto res = *this - *this / r * r;
        res.shrink();
        return res;
    }

    Poly diff() const {
        V<D> res(max(0, size() - 1));
        for (int i = 1; i < size(); ++i) {
            res[i - 1] = at(i) * i;
        }
        return res;
    }

    Poly inte() const {
        V<D> res(size() + 1);
        for (int i = 0; i < size(); ++i) {
            res[i + 1] = at(i) / (D)(i + 1);
        }
        return res;
    }

    // f * f.inv(m) === 1 mod (x^m)
    // f_0 ^ -1 must exist
    Poly inv(int m) const {
        Poly res = Poly({D(1) / at(0)});
        for (int i = 1; i < m; i *= 2) {
            res = (res * D(2) - res * res * pref(i * 2)).pref(i * 2);
        }
        return res.pref(m);
    }

    // f_0 = 1 must hold
    Poly log(int n) const {
        auto f = pref(n);
        return (f.diff() * f.inv(n - 1)).pref(n - 1).inte();
    }

    // f_0 = 0 must hold
    Poly exp(int n) const {
        auto h = diff();
        Poly f({1}), g({1});

        for (int m = 1; m < n; m *= 2) {
            g = (g * D(2) - f * g * g).pref(m);
            auto q = h.pref(m - 1);
            auto w = (q + g * (f.diff() - f * q)).pref(m * 2 - 1);
            f = (f + f * (*this - w.inte()).pref(m * 2)).pref(m * 2);
        }

        return f.pref(n);
    }

    // front n elements of f(x)^k
    // be careful when k = 0
    Poly pow(ll k, int n) const {
        int zero = 0;
        while (zero < size() && at(zero) == 0) {
            zero++;
        }

        if (zero == size() || zero * k >= n) {
            Poly res(n);
            if (n > 0 && k == 0) res[0] = 1;
            return res;
        }

        Poly h(this->begin() + zero, this->end());
        D a = h[0], ra = D(1) / a;
        h *= ra;
        h = h.log(n - zero * k) * D(k);
        h = h.exp(n - zero * k);
        h = h.shiftr(zero * k) * a.pow(k);
        return h;
    }

    // f_0 = 1 must hold (use it with modular sqrt)
    // CF250E
    Poly sqrt(int n) const {
        Poly f = pref(n);
        Poly g({1});
        for (int i = 1; i < n; i *= 2) {
            g = (g + f.pref(i * 2) * g.inv(i * 2)) * D(2).inv();
        }
        return g.pref(n);
    }

    D eval(D x) const {
        D res = 0, c = 1;
        for (auto a : *this) {
            res += a * c;
            c *= x;
        }
        return res;
    }

    Poly powmod(ll k, const Poly& md) {
        auto v = *this % md;
        Poly res{1};
        while (k) {
            if (k & 1) {
                res = res * v % md;
            }
            v = v * v % md;
            k /= 2;
        }
        return res;
    }

    Poly& operator+=(const Poly& r) { return *this = *this + r; }
    Poly& operator-=(const Poly& r) { return *this = *this - r; }
    Poly& operator*=(const D& r) { return *this = *this * r; }
    Poly& operator*=(const Poly& r) { return *this = *this * r; }
    Poly& operator/=(const Poly& r) { return *this = *this / r; }
    Poly& operator/=(const D& r) { return *this = *this / r; }
    Poly& operator%=(const Poly& r) { return *this = *this % r; }

    friend ostream& operator<<(ostream& os, const Poly& pl) {
        if (pl.size() == 0) return os << "0";
        for (int i = 0; i < pl.size(); ++i) {
            if (pl[i]) {
                os << pl[i] << "x^" << i;
                if (i + 1 != pl.size()) os << ",";
            }
        }
        return os;
    }

    explicit operator bool() const {
        bool f = false;
        for (int i = 0; i < size(); ++i) {
            if (at(i)) {
                f = true;
            }
        }
        return f;
    }
};

// calculate characteristic polynomial
// c_0 * s_i + c_1 * s_{i+1} + ... + c_k * s_{i+k} = 0
// c_k = -1

template <class T>
Poly<T> berlekamp_massey(const V<T>& s) {
    int n = int(s.size());
    V<T> b = {T(-1)}, c = {T(-1)};
    T y = Mint(1);
    for (int ed = 1; ed <= n; ed++) {
        int l = int(c.size()), m = int(b.size());
        T x = 0;
        for (int i = 0; i < l; i++) {
            x += c[i] * s[ed - l + i];
        }
        b.push_back(0);
        m++;
        if (!x) {
            continue;
        }
        T freq = x / y;
        if (l < m) {
            auto tmp = c;
            c.insert(begin(c), m - l, Mint(0));
            for (int i = 0; i < m; i++) {
                c[m - 1 - i] -= freq * b[m - 1 - i];
            }
            b = tmp;
            y = x;
        } else {
            for (int i = 0; i < m; i++) {
                c[l - 1 - i] -= freq * b[m - 1 - i];
            }
        }
    }
    return c;
}

// HUPC 2020 day3 K, ABC225H
// calculate vec[0] * vec[1] * ...
// deg(result) must be bounded

template <class T>
Poly<T> prod(const V<Poly<T>>& vec) {
    auto comp = [](const auto& a, const auto& b) -> bool {
        return a.size() > b.size();
    };
    priority_queue<Poly<T>, V<Poly<T>>, decltype(comp)> que(comp);
    que.push(Poly<T>{1});

    for (auto& pl : vec) que.push(pl);

    while (que.size() > 1) {
        auto va = que.top();
        que.pop();
        auto vb = que.top();
        que.pop();
        que.push(va * vb);
    }

    return que.top();
}

// ABC215 G
// expand f(x + c)
// require factorial
template <class T>
Poly<T> taylor_shift(const Poly<T>& f, ll c) {
    using P = Poly<T>;
    int n = f.size();
    T powc = 1;
    P p(n), q(n);

    rep(i, n) {
        p[i] = f[i] * fact[i];
        q[n - 1 - i] = powc * ifact[i];
        powc *= c;
    }
    p = p * q;
    rep(i, n) q[i] = p[n - 1 + i] * ifact[i];
    return q;
}

template <class D>
struct MultiEval {
    using P = MultiEval*;
    P lc, rc;
    V<D> xs;
    int sz;
    Poly<D> dpol;

    const int B = 100;

    MultiEval(const V<D>& _xs, int l, int r) : sz(r - l) {
        if (r - l <= B) {
            xs = {_xs.begin() + l, _xs.begin() + r};
            dpol = {{1}};
            for (auto x : xs) {
                dpol *= {-x, 1};
            }
            return;
        }
        lc = new MultiEval(_xs, l, (l + r) / 2);
        rc = new MultiEval(_xs, (l + r) / 2, r);
        dpol = lc->dpol * rc->dpol;
    }

    MultiEval(const V<D>& xs) : MultiEval(xs, 0, xs.size()) {}

    void eval(const Poly<D>& poly, V<D>& res) {
        auto p = poly % dpol;
        if (sz <= B) {
            for (auto x : xs) {
                res.pb(p.eval(x));
            }
            return;
        }
        lc->eval(p, res);
        rc->eval(p, res);
    }

    V<D> eval(const Poly<D>& poly) {
        V<D> res;
        eval(poly, res);
        return res;
    }
};

void slv() {
    int M, N;
    cin >> M >> N;
    V<int> A(N);
    cin >> A;
    A -= 1;
    V<int> C(M);
    for (int x : A) C[x]++;

    using P = Poly<Mint>;
    V<P> vp;

    rep(i, M) {
        rep(j, C[i]) { vp.eb(P{-j, 1}); }
    }

    auto f = prod(vp);
    V<Mint> xs(N + 1);
    iota(ALL(xs), 0);

    MultiEval me(xs);
    auto ff = me.eval(f);
    debug(ff);

    V<Mint> v1(N + 1), v2(N + 1);
    rep(i, N + 1) {
        v1[i] = ff[i] * ifact[i];
        v2[i] = ifact[i] * (i % 2 ? -1 : 1);
    }
    auto v3 = ntt.mul(v1, v2);

    Mint ans;
    rep(i, N + 1) ans += v3[i] * fact[i];
    show(ans);
}

int main() {
    int cases = 1;
    // cin >> cases;
    rep(i, cases) slv();

    return 0;
}
0