結果
問題 | No.2243 Coaching Schedule |
ユーザー | woodywoody |
提出日時 | 2024-06-02 14:02:06 |
言語 | C++17(gcc12) (gcc 12.3.0 + boost 1.87.0) |
結果 |
AC
|
実行時間 | 529 ms / 4,000 ms |
コード長 | 25,419 bytes |
コンパイル時間 | 5,802 ms |
コンパイル使用メモリ | 289,984 KB |
実行使用メモリ | 87,504 KB |
最終ジャッジ日時 | 2024-12-23 10:14:03 |
合計ジャッジ時間 | 18,705 ms |
ジャッジサーバーID (参考情報) |
judge3 / judge1 |
(要ログイン)
テストケース
テストケース表示入力 | 結果 | 実行時間 実行使用メモリ |
---|---|---|
testcase_00 | AC | 160 ms
81,536 KB |
testcase_01 | AC | 167 ms
81,536 KB |
testcase_02 | AC | 163 ms
81,408 KB |
testcase_03 | AC | 161 ms
81,536 KB |
testcase_04 | AC | 160 ms
81,792 KB |
testcase_05 | AC | 328 ms
87,472 KB |
testcase_06 | AC | 364 ms
87,480 KB |
testcase_07 | AC | 356 ms
87,476 KB |
testcase_08 | AC | 357 ms
87,476 KB |
testcase_09 | AC | 371 ms
87,476 KB |
testcase_10 | AC | 367 ms
87,480 KB |
testcase_11 | AC | 373 ms
87,480 KB |
testcase_12 | AC | 529 ms
87,504 KB |
testcase_13 | AC | 483 ms
87,492 KB |
testcase_14 | AC | 487 ms
87,500 KB |
testcase_15 | AC | 479 ms
87,492 KB |
testcase_16 | AC | 295 ms
87,480 KB |
testcase_17 | AC | 208 ms
83,072 KB |
testcase_18 | AC | 292 ms
85,792 KB |
testcase_19 | AC | 337 ms
86,032 KB |
testcase_20 | AC | 272 ms
84,480 KB |
testcase_21 | AC | 237 ms
83,968 KB |
testcase_22 | AC | 214 ms
83,712 KB |
testcase_23 | AC | 159 ms
82,048 KB |
testcase_24 | AC | 252 ms
84,480 KB |
testcase_25 | AC | 167 ms
82,176 KB |
testcase_26 | AC | 197 ms
82,944 KB |
testcase_27 | AC | 274 ms
84,608 KB |
testcase_28 | AC | 168 ms
82,176 KB |
testcase_29 | AC | 300 ms
85,648 KB |
testcase_30 | AC | 323 ms
86,244 KB |
testcase_31 | AC | 319 ms
86,508 KB |
testcase_32 | AC | 213 ms
83,584 KB |
testcase_33 | AC | 201 ms
83,200 KB |
testcase_34 | AC | 283 ms
85,860 KB |
testcase_35 | AC | 311 ms
85,860 KB |
testcase_36 | AC | 321 ms
86,128 KB |
ソースコード
#include<bits/stdc++.h>#include<atcoder/all>#define rep(i,b) for(int i=0;i<b;i++)#define rrep(i,b) for(int i=b-1;i>=0;i--)#define rep1(i,b) for(int i=1;i<b;i++)#define repx(i,x,b) for(int i=x;i<b;i++)#define rrepx(i,x,b) for(int i=b-1;i>=x;i--)#define fore(i,a) for(auto& i:a)#define rng(x) (x).begin(), (x).end()#define rrng(x) (x).rbegin(), (x).rend()#define sz(x) ((int)(x).size())#define pb push_back#define fi first#define se second#define pcnt __builtin_popcountllusing namespace std;using namespace atcoder;using ll = long long;using ull = long long;using ld = long double;template<typename T> using mpq = priority_queue<T, vector<T>, greater<T>>;template<typename T> bool chmax(T &a, const T &b) { if (a<b) { a=b; return 1; } return 0; }template<typename T> bool chmin(T &a, const T &b) { if (b<a) { a=b; return 1; } return 0; }template<typename T> ll sumv(const vector<T>&a){ll res(0);for(auto&&x:a)res+=x;return res;}bool yn(bool a) { if(a) {cout << "Yes" << endl; return true;} else {cout << "No" << endl; return false;}}#define retval(x) {cout << #x << endl; return;}#define cout2(x,y) cout << x << " " << y << endl;#define coutp(p) cout << p.fi << " " << p.se << endl;#define out cout << ans << endl;#define outd cout << fixed << setprecision(20) << ans << endl;#define outm cout << ans.val() << endl;#define outv fore(yans , ans) cout << yans << "\n";#define outdv fore(yans , ans) cout << yans.val() << "\n";#define assertmle(x) if (!(x)) {vi v(3e8);}#define asserttle(x) if (!(x)) {while(1){}}#define coutv(v) {fore(vy , v) {cout << vy << " ";} cout << endl;}#define coutv2(v) fore(vy , v) cout << vy << "\n";#define coutvm(v) {fore(vy , v) {cout << vy.val() << " ";} cout << endl;}#define coutvm2(v) fore(vy , v) cout << vy.val() << "\n";using pll = pair<ll,ll>;using pil = pair<int,ll>;using pli = pair<ll,int>;using pii = pair<int,int>;using pdd = pair<ld,ld>;using vi = vector<int>;using vd = vector<ld>;using vl = vector<ll>;using vs = vector<string>;using vb = vector<bool>;using vpii = vector<pii>;using vpli = vector<pli>;using vpll = vector<pll>;using vpil = vector<pil>;using vvi = vector<vector<int>>;using vvl = vector<vector<ll>>;using vvs = vector<vector<string>>;using vvb = vector<vector<bool>>;using vvpii = vector<vector<pii>>;using vvpli = vector<vector<pli>>;using vvpll = vector<vpll>;using vvpil = vector<vpil>;using mint = modint998244353;//using mint = modint1000000007;//using mint = dynamic_modint<0>;using vm = vector<mint>;using vvm = vector<vector<mint>>;vector<int> dx={1,0,-1,0,1,1,-1,-1},dy={0,1,0,-1,1,-1,1,-1};ll gcd(ll a, ll b) { return a?gcd(b%a,a):b;}ll lcm(ll a, ll b) { return a/gcd(a,b)*b;}#define yes {cout <<"Yes"<<endl;}#define no {cout <<"No"<<endl;}const double eps = 1e-10;const ll LINF = 2001002003004005006ll;const int INF = 1001001001;#ifdef MY_LOCAL_DEBUG#include "./debug/localDebug.cpp"#define showp(p) cerr<<#p<<" = "<<p.fi<<" : "<<p.se<<endl#define show1(a) cerr<<#a<<" = "<<a<<endl#define show2(a,b) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<endl#define show3(a,b,c) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<endl#define show4(a,b,c,d) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<" : "<<#d<<" = "<<d<<endl#define show5(a,b,c,d,e) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<" : "<<#d<<" = "<<d<<" : "<<#e<<" = "<<e<<endl#define DEBUG_LINE cout << "DEBUG_LINE : " << __LINE__ << endl#define showv(v) {cout<<#v<<" : "; fore(vy , v) {cout << vy << " ";} cout << endl;}#define showv2(v) {cout<<#v<<endl; fore(vy , v) cout << vy << "\n";}#define showvm(v) {cout<<#v<<" : "; fore(vy , v) {cout << vy.val() << " ";} cout << endl;}#define showvm2(v) {cout<<#v<<endl; fore(vy , v) cout << vy.val() << "\n";}#define showmat(v) {cout<<#v<<endl; fore(row , v) { fore(seg , row) cout << seg << " "; cout << endl;}}#define showmatm(v) {cout<<#v<<endl; fore(row , v) { fore(seg , row) cout << seg.val() << " "; cout << endl;}}#else#define showp(p)#define show1(a)#define show2(a,b)#define show3(a,b,c)#define show4(a,b,c,d)#define show5(a,b,c,d,e)#define DEBUG_LINE#define showv(v)#define showv2(v)#define showvm(v)#define showvm2(v)#define showmat(v)#define showmatm(v)#endif#define overload5(a,b,c,d,e,f, ...) f#define show(...) overload5(__VA_ARGS__, show5, show4, show3, show2, show1)(__VA_ARGS__)struct NTT{uint32_t get_pr(){uint32_t _mod = mint::mod();using u64 = uint64_t;u64 ds[32] = {};int idx = 0;u64 m = _mod - 1;for (u64 i = 2; i * i <= m; ++i){if (m % i == 0){ds[idx++] = i;while (m % i == 0)m /= i;}}if (m != 1)ds[idx++] = m;uint32_t _pr = 2;while (1){int flg = 1;for (int i = 0; i < idx; ++i){u64 a = _pr, b = (_mod - 1) / ds[i], r = 1;while (b){if (b & 1)r = r * a % _mod;a = a * a % _mod;b >>= 1;}if (r == 1){flg = 0;break;}}if (flg == 1)break;++_pr;}return _pr;};static constexpr uint32_t mod = mint::mod();uint32_t pr = get_pr();static constexpr int level = __builtin_ctzll(mod - 1);mint dw[level], dy[level];void setwy(int k){mint w[level], y[level];w[k - 1] = mint(pr).pow((mod - 1) / (1 << k));y[k - 1] = w[k - 1].inv();for (int i = k - 2; i > 0; --i)w[i] = w[i + 1] * w[i + 1], y[i] = y[i + 1] * y[i + 1];dw[1] = w[1], dy[1] = y[1], dw[2] = w[2], dy[2] = y[2];for (int i = 3; i < k; ++i){dw[i] = dw[i - 1] * y[i - 2] * w[i];dy[i] = dy[i - 1] * w[i - 2] * y[i];}}NTT() { setwy(level); }void fft4(vector<mint> &a, int k){if ((int)a.size() <= 1)return;if (k == 1){mint a1 = a[1];a[1] = a[0] - a[1];a[0] = a[0] + a1;return;}if (k & 1){int v = 1 << (k - 1);for (int j = 0; j < v; ++j){mint ajv = a[j + v];a[j + v] = a[j] - ajv;a[j] += ajv;}}int u = 1 << (2 + (k & 1));int v = 1 << (k - 2 - (k & 1));mint one = mint(1);mint imag = dw[1];while (v){// jh = 0{int j0 = 0;int j1 = v;int j2 = j1 + v;int j3 = j2 + v;for (; j0 < v; ++j0, ++j1, ++j2, ++j3){mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];mint t0p2 = t0 + t2, t1p3 = t1 + t3;mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;}}// jh >= 1mint ww = one, xx = one * dw[2], wx = one;for (int jh = 4; jh < u;){ww = xx * xx, wx = ww * xx;int j0 = jh * v;int je = j0 + v;int j2 = je + v;for (; j0 < je; ++j0, ++j2){mint t0 = a[j0], t1 = a[j0 + v] * xx, t2 = a[j2] * ww,t3 = a[j2 + v] * wx;mint t0p2 = t0 + t2, t1p3 = t1 + t3;mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;a[j0] = t0p2 + t1p3, a[j0 + v] = t0p2 - t1p3;a[j2] = t0m2 + t1m3, a[j2 + v] = t0m2 - t1m3;}xx *= dw[__builtin_ctzll((jh += 4))];}u <<= 2;v >>= 2;}}void ifft4(vector<mint> &a, int k){if ((int)a.size() <= 1)return;if (k == 1){mint a1 = a[1];a[1] = a[0] - a[1];a[0] = a[0] + a1;return;}int u = 1 << (k - 2);int v = 1;mint one = mint(1);mint imag = dy[1];while (u){// jh = 0{int j0 = 0;int j1 = v;int j2 = v + v;int j3 = j2 + v;for (; j0 < v; ++j0, ++j1, ++j2, ++j3){mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];mint t0p1 = t0 + t1, t2p3 = t2 + t3;mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * imag;a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3;a[j1] = t0m1 + t2m3, a[j3] = t0m1 - t2m3;}}// jh >= 1mint ww = one, xx = one * dy[2], yy = one;u <<= 2;for (int jh = 4; jh < u;){ww = xx * xx, yy = xx * imag;int j0 = jh * v;int je = j0 + v;int j2 = je + v;for (; j0 < je; ++j0, ++j2){mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v];mint t0p1 = t0 + t1, t2p3 = t2 + t3;mint t0m1 = (t0 - t1) * xx, t2m3 = (t2 - t3) * yy;a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww;a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww;}xx *= dy[__builtin_ctzll(jh += 4)];}u >>= 4;v <<= 2;}if (k & 1){u = 1 << (k - 1);for (int j = 0; j < u; ++j){mint ajv = a[j] - a[j + u];a[j] += a[j + u];a[j + u] = ajv;}}}void ntt(vector<mint> &a){if ((int)a.size() <= 1)return;fft4(a, __builtin_ctz(a.size()));}void intt(vector<mint> &a){if ((int)a.size() <= 1)return;ifft4(a, __builtin_ctz(a.size()));mint iv = mint(a.size()).inv();for (auto &x : a)x *= iv;}vector<mint> multiply(const vector<mint> &a, const vector<mint> &b){int l = a.size() + b.size() - 1;if (min<int>(a.size(), b.size()) <= 40){vector<mint> s(l);for (int i = 0; i < (int)a.size(); ++i)for (int j = 0; j < (int)b.size(); ++j)s[i + j] += a[i] * b[j];return s;}int k = 2, M = 4;while (M < l)M <<= 1, ++k;setwy(k);vector<mint> s(M), t(M);for (int i = 0; i < (int)a.size(); ++i)s[i] = a[i];for (int i = 0; i < (int)b.size(); ++i)t[i] = b[i];fft4(s, k);fft4(t, k);for (int i = 0; i < M; ++i)s[i] *= t[i];ifft4(s, k);s.resize(l);mint invm = mint(M).inv();for (int i = 0; i < l; ++i)s[i] *= invm;return s;}void ntt_doubling(vector<mint> &a){int M = (int)a.size();auto b = a;intt(b);mint r = 1, zeta = mint(pr).pow((mint::mod() - 1) / (M << 1));for (int i = 0; i < M; i++)b[i] *= r, r *= zeta;ntt(b);copy(begin(b), end(b), back_inserter(a));}};struct FPS : vector<mint>{using vector<mint>::vector;FPS &operator+=(const FPS &r){if (r.size() > this->size())this->resize(r.size());for (int i = 0; i < (int)r.size(); i++)(*this)[i] += r[i];return *this;}FPS &operator+=(const mint &r){if (this->empty())this->resize(1);(*this)[0] += r;return *this;}FPS &operator-=(const FPS &r){if (r.size() > this->size())this->resize(r.size());for (int i = 0; i < (int)r.size(); i++)(*this)[i] -= r[i];return *this;}FPS &operator-=(const mint &r){if (this->empty())this->resize(1);(*this)[0] -= r;return *this;}FPS &operator*=(const mint &v){for (int k = 0; k < (int)this->size(); k++)(*this)[k] *= v;return *this;}FPS &operator/=(const FPS &r){if (this->size() < r.size()){this->clear();return *this;}int n = this->size() - r.size() + 1;if ((int)r.size() <= 64){FPS f(*this), g(r);g.shrink();mint coeff = g.back().inv();for (auto &x : g)x *= coeff;int deg = (int)f.size() - (int)g.size() + 1;int gs = g.size();FPS quo(deg);for (int i = deg - 1; i >= 0; i--){quo[i] = f[i + gs - 1];for (int j = 0; j < gs; j++)f[i + j] -= quo[i] * g[j];}*this = quo * coeff;this->resize(n, mint(0));return *this;}return *this = ((*this).rev().pre(n) * r.rev().inv(n)).pre(n).rev();}FPS &operator%=(const FPS &r){*this -= *this / r * r;shrink();return *this;}FPS operator+(const FPS &r) const { return FPS(*this) += r; }FPS operator+(const mint &v) const { return FPS(*this) += v; }FPS operator-(const FPS &r) const { return FPS(*this) -= r; }FPS operator-(const mint &v) const { return FPS(*this) -= v; }FPS operator*(const FPS &r) const { return FPS(*this) *= r; }FPS operator*(const mint &v) const { return FPS(*this) *= v; }FPS operator/(const FPS &r) const { return FPS(*this) /= r; }FPS operator%(const FPS &r) const { return FPS(*this) %= r; }FPS operator-() const{FPS ret(this->size());for (int i = 0; i < (int)this->size(); i++)ret[i] = -(*this)[i];return ret;}void shrink(){while (this->size() && this->back() == mint(0))this->pop_back();}FPS rev() const{FPS ret(*this);reverse(rng(ret));return ret;}FPS dot(FPS r) const{FPS ret(min(this->size(), r.size()));for (int i = 0; i < (int)ret.size(); i++)ret[i] = (*this)[i] * r[i];return ret;}FPS pre(int sz) const{return FPS((*this).begin(), (*this).begin() + min((int)this->size(), sz));}FPS operator>>(int sz) const{if ((int)this->size() <= sz)return {};FPS ret(*this);ret.erase(ret.begin(), ret.begin() + sz);return ret;}FPS operator<<(int sz) const{FPS ret(*this);ret.insert(ret.begin(), sz, mint(0));return ret;}FPS diff() const{const int n = (int)this->size();FPS ret(max(0, n - 1));mint one(1), coeff(1);for (int i = 1; i < n; i++){ret[i - 1] = (*this)[i] * coeff;coeff += one;}return ret;}FPS integral() const{const int n = (int)this->size();FPS ret(n + 1);ret[0] = mint(0);if (n > 0)ret[1] = mint(1);auto mod = mint::mod();for (int i = 2; i <= n; i++)ret[i] = (-ret[mod % i]) * (mod / i);for (int i = 0; i < n; i++)ret[i + 1] *= (*this)[i];return ret;}mint eval(mint x) const{mint r = 0, w = 1;for (auto &v : *this)r += w * v, w *= x;return r;}FPS log(int deg = -1) const{assert((*this)[0] == mint(1));if (deg == -1)deg = (int)this->size();return (this->diff() * this->inv(deg)).pre(deg - 1).integral();}FPS pow(int64_t k, int deg = -1) const{const int n = (int)this->size();if (deg == -1)deg = n;if (k == 0){FPS ret(deg);if (deg)ret[0] = 1;return ret;}for (int i = 0; i < n; i++){if ((*this)[i] != mint(0)){mint rev = mint(1) / (*this)[i];FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg);ret *= (*this)[i].pow(k);ret = (ret << (i * k)).pre(deg);if ((int)ret.size() < deg)ret.resize(deg, mint(0));return ret;}if (__int128_t(i + 1) * k >= deg)return FPS(deg, mint(0));}return FPS(deg, mint(0));}FPS &operator*=(const FPS &r){if (this->empty() || r.empty()){this->clear();return *this;}set_fft();auto ret = static_cast<NTT *>(ntt_ptr)->multiply(*this, r);return *this = FPS(ret.begin(), ret.end());}FPS inv(int deg = -1) const{assert((*this)[0] != mint(0));if (deg == -1)deg = (int)this->size();FPS res(deg);res[0] = {mint(1) / (*this)[0]};for (int d = 1; d < deg; d <<= 1){FPS f(2 * d), g(2 * d);for (int j = 0; j < min((int)this->size(), 2 * d); j++)f[j] = (*this)[j];for (int j = 0; j < d; j++)g[j] = res[j];f.ntt();g.ntt();for (int j = 0; j < 2 * d; j++)f[j] *= g[j];f.intt();for (int j = 0; j < d; j++)f[j] = 0;f.ntt();for (int j = 0; j < 2 * d; j++)f[j] *= g[j];f.intt();for (int j = d; j < min(2 * d, deg); j++)res[j] = -f[j];}return res.pre(deg);}FPS exp(int deg) const{using fps = FPS;assert((*this).size() == 0 || (*this)[0] == mint(0));if (deg == -1)deg = this->size();fps inv;inv.reserve(deg + 1);inv.push_back(mint(0));inv.push_back(mint(1));auto inplace_integral = [&](fps &F) -> void{const int n = (int)F.size();auto mod = mint::mod();while ((int)inv.size() <= n){int i = inv.size();inv.push_back((-inv[mod % i]) * (mod / i));}F.insert(F.begin(), mint(0));for (int i = 1; i <= n; i++)F[i] *= inv[i];};auto inplace_diff = [](fps &F) -> void{if (F.empty())return;F.erase(F.begin());mint coeff = 1, one = 1;for (int i = 0; i < (int)F.size(); i++){F[i] *= coeff;coeff += one;}};fps b{1, 1 < (int)this->size() ? (*this)[1] : 0}, c{1}, z1, z2{1, 1};for (int m = 2; m < deg; m *= 2){auto y = b;y.resize(2 * m);y.ntt();z1 = z2;fps z(m);for (int i = 0; i < m; ++i)z[i] = y[i] * z1[i];z.intt();fill(z.begin(), z.begin() + m / 2, mint(0));z.ntt();for (int i = 0; i < m; ++i)z[i] *= -z1[i];z.intt();c.insert(c.end(), z.begin() + m / 2, z.end());z2 = c;z2.resize(2 * m);z2.ntt();fps x((*this).begin(), (*this).begin() + min<int>(this->size(), m));x.resize(m);inplace_diff(x);x.push_back(mint(0));x.ntt();for (int i = 0; i < m; ++i)x[i] *= y[i];x.intt();x -= b.diff();x.resize(2 * m);for (int i = 0; i < m - 1; ++i)x[m + i] = x[i], x[i] = mint(0);x.ntt();for (int i = 0; i < 2 * m; ++i)x[i] *= z2[i];x.intt();x.pop_back();inplace_integral(x);for (int i = m; i < min<int>(this->size(), 2 * m); ++i)x[i] += (*this)[i];fill(x.begin(), x.begin() + m, mint(0));x.ntt();for (int i = 0; i < 2 * m; ++i)x[i] *= y[i];x.intt();b.insert(b.end(), x.begin() + m, x.end());}return fps{b.begin(), b.begin() + deg};}// 以下、ユーザは使わないstatic void set_fft() {if (!ntt_ptr) ntt_ptr = new NTT;}void ntt() {set_fft();static_cast<NTT*>(ntt_ptr)->ntt(*this);}void intt() {set_fft();static_cast<NTT*>(ntt_ptr)->intt(*this);}void ntt_doubling() {set_fft();static_cast<NTT*>(ntt_ptr)->ntt_doubling(*this);}static int ntt_pr() {set_fft();return static_cast<NTT*>(ntt_ptr)->pr;}static void *ntt_ptr;};void *FPS::ntt_ptr = nullptr;// FPSライブラリ// 除算 : f/g は右式のqを返す f = g*q + r// pow(int64_t k, int deg = -1) : f^k mod x^deg// 注意 : f/g (mod n) は f * g.inv(n)// x^0, x^1, ..., x^N をオンラインで計算する// x^{n-1} までを確定させた時点で, c[n] には a_0 b_n と// a_n b_0 以外の寄与の和が入っているので, それを利用することもできるtemplate <typename fps>struct RelaxedConvolution {using mint = typename fps::value_type;int N, q;vector<mint> a, b, c;fps f, g;vector<fps> as, bs;RelaxedConvolution(int _n) : N(_n), q(0) {a.resize(N + 1), b.resize(N + 1), c.resize(N + 1);}// a[q] = x, b[q] = y であるとき c[q] を getmint get(mint x, mint y) {assert(q <= N);a[q] = x, b[q] = y;c[q] += a[q] * b[0] + (q ? b[q] * a[0] : 0);auto precalc = [&](int lg) {if ((int)as.size() <= lg) as.resize(lg + 1), bs.resize(lg + 1);if (!as[lg].empty()) return;int d = 1 << lg;fps s{begin(a), begin(a) + d * 2};fps t{begin(b), begin(b) + d * 2};s.ntt(), t.ntt();as[lg] = s, bs[lg] = t;};q++;if (q > N) return c[q - 1];for (int d = 1, lg = 0; d <= q; d *= 2, lg++) {if (q % (2 * d) != d) continue;if (q == d) {f.assign(2 * d, mint{});g.assign(2 * d, mint{});for (int i = 0; i < d; i++) f[i] = a[i];for (int i = 0; i < d; i++) g[i] = b[i];f.ntt(), g.ntt();for (int i = 0; i < d * 2; i++) f[i] *= g[i];f.intt();for (int i = q; i < min(q + d, N + 1); i++) c[i] += f[d + i - q];} else {precalc(lg);f.assign(2 * d, mint{});g.assign(2 * d, mint{});for (int i = 0; i < d; i++) f[i] = a[q - d + i];for (int i = 0; i < d; i++) g[i] = b[q - d + i];f.ntt(), g.ntt();fps& s = as[lg];fps& t = bs[lg];for (int i = 0; i < d * 2; i++) f[i] = f[i] * t[i] + g[i] * s[i];f.intt();for (int i = q; i < min(q + d, N + 1); i++) c[i] += f[d + i - q];}}return c[q - 1];}};struct combination {vector<mint> fact, ifact;combination(int n):fact(n+1),ifact(n+1) {fact[0] = 1;for (int i = 1; i <= n; ++i) fact[i] = fact[i-1]*i;ifact[n] = fact[n].inv();for (int i = n; i >= 1; --i) ifact[i-1] = ifact[i]*i;}mint operator()(int n, int k) {if (k < 0 || k > n) return 0;return fact[n]*ifact[k]*ifact[n-k];}} com(10000005);void solve(){int m,n; cin>>m>>n;vi a(n);rep(i,n) cin>>a[i],a[i]--;vi cnt(m);rep(i,n) cnt[a[i]]++;vm g(n+1);vm b(n+1);vm c(n+1);rep(i,n+1) c[i] = com.ifact[i+1];int mx = 0;rep(i,m) chmax(mx, cnt[i]);RelaxedConvolution<FPS> rc(n+1);rep(i,mx-1){rc.get(b[i], c[i]);}map<int,int> mp;rep(i,m){mp[cnt[i]]++;}vm now;vi p;mint coef = 1;fore(y , mp){now.pb(mx-y.fi);p.pb(y.se);coef *= com.fact[mx-y.fi].pow(y.se);}mint ans = 0;repx(i,mx,n+1){mint tmp = rc.get(b[i-1], c[i-1]) * com.fact[i];mint f = com.fact[i].pow(m) / coef;mint g = f - tmp;ans += g;b[i] = g*com.ifact[i];rep(j,sz(now)){now[j]++;coef *= now[j].pow(p[j]);}}outm;return;}int main(){ios::sync_with_stdio(false);cin.tie(0);int t = 1;//cin>>t;rep(i,t){solve();}return 0;}