結果

問題 No.2243 Coaching Schedule
ユーザー woodywoodywoodywoody
提出日時 2024-06-02 14:02:06
言語 C++17(gcc12)
(gcc 12.3.0 + boost 1.87.0)
結果
AC  
実行時間 529 ms / 4,000 ms
コード長 25,419 bytes
コンパイル時間 5,802 ms
コンパイル使用メモリ 289,984 KB
実行使用メモリ 87,504 KB
最終ジャッジ日時 2024-12-23 10:14:03
合計ジャッジ時間 18,705 ms
ジャッジサーバーID
(参考情報)
judge3 / judge1
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 160 ms
81,536 KB
testcase_01 AC 167 ms
81,536 KB
testcase_02 AC 163 ms
81,408 KB
testcase_03 AC 161 ms
81,536 KB
testcase_04 AC 160 ms
81,792 KB
testcase_05 AC 328 ms
87,472 KB
testcase_06 AC 364 ms
87,480 KB
testcase_07 AC 356 ms
87,476 KB
testcase_08 AC 357 ms
87,476 KB
testcase_09 AC 371 ms
87,476 KB
testcase_10 AC 367 ms
87,480 KB
testcase_11 AC 373 ms
87,480 KB
testcase_12 AC 529 ms
87,504 KB
testcase_13 AC 483 ms
87,492 KB
testcase_14 AC 487 ms
87,500 KB
testcase_15 AC 479 ms
87,492 KB
testcase_16 AC 295 ms
87,480 KB
testcase_17 AC 208 ms
83,072 KB
testcase_18 AC 292 ms
85,792 KB
testcase_19 AC 337 ms
86,032 KB
testcase_20 AC 272 ms
84,480 KB
testcase_21 AC 237 ms
83,968 KB
testcase_22 AC 214 ms
83,712 KB
testcase_23 AC 159 ms
82,048 KB
testcase_24 AC 252 ms
84,480 KB
testcase_25 AC 167 ms
82,176 KB
testcase_26 AC 197 ms
82,944 KB
testcase_27 AC 274 ms
84,608 KB
testcase_28 AC 168 ms
82,176 KB
testcase_29 AC 300 ms
85,648 KB
testcase_30 AC 323 ms
86,244 KB
testcase_31 AC 319 ms
86,508 KB
testcase_32 AC 213 ms
83,584 KB
testcase_33 AC 201 ms
83,200 KB
testcase_34 AC 283 ms
85,860 KB
testcase_35 AC 311 ms
85,860 KB
testcase_36 AC 321 ms
86,128 KB
権限があれば一括ダウンロードができます

ソースコード

diff #
プレゼンテーションモードにする

#include<bits/stdc++.h>
#include<atcoder/all>
#define rep(i,b) for(int i=0;i<b;i++)
#define rrep(i,b) for(int i=b-1;i>=0;i--)
#define rep1(i,b) for(int i=1;i<b;i++)
#define repx(i,x,b) for(int i=x;i<b;i++)
#define rrepx(i,x,b) for(int i=b-1;i>=x;i--)
#define fore(i,a) for(auto& i:a)
#define rng(x) (x).begin(), (x).end()
#define rrng(x) (x).rbegin(), (x).rend()
#define sz(x) ((int)(x).size())
#define pb push_back
#define fi first
#define se second
#define pcnt __builtin_popcountll
using namespace std;
using namespace atcoder;
using ll = long long;
using ull = long long;
using ld = long double;
template<typename T> using mpq = priority_queue<T, vector<T>, greater<T>>;
template<typename T> bool chmax(T &a, const T &b) { if (a<b) { a=b; return 1; } return 0; }
template<typename T> bool chmin(T &a, const T &b) { if (b<a) { a=b; return 1; } return 0; }
template<typename T> ll sumv(const vector<T>&a){ll res(0);for(auto&&x:a)res+=x;return res;}
bool yn(bool a) { if(a) {cout << "Yes" << endl; return true;} else {cout << "No" << endl; return false;}}
#define retval(x) {cout << #x << endl; return;}
#define cout2(x,y) cout << x << " " << y << endl;
#define coutp(p) cout << p.fi << " " << p.se << endl;
#define out cout << ans << endl;
#define outd cout << fixed << setprecision(20) << ans << endl;
#define outm cout << ans.val() << endl;
#define outv fore(yans , ans) cout << yans << "\n";
#define outdv fore(yans , ans) cout << yans.val() << "\n";
#define assertmle(x) if (!(x)) {vi v(3e8);}
#define asserttle(x) if (!(x)) {while(1){}}
#define coutv(v) {fore(vy , v) {cout << vy << " ";} cout << endl;}
#define coutv2(v) fore(vy , v) cout << vy << "\n";
#define coutvm(v) {fore(vy , v) {cout << vy.val() << " ";} cout << endl;}
#define coutvm2(v) fore(vy , v) cout << vy.val() << "\n";
using pll = pair<ll,ll>;using pil = pair<int,ll>;using pli = pair<ll,int>;using pii = pair<int,int>;using pdd = pair<ld,ld>;
using vi = vector<int>;using vd = vector<ld>;using vl = vector<ll>;using vs = vector<string>;using vb = vector<bool>;
using vpii = vector<pii>;using vpli = vector<pli>;using vpll = vector<pll>;using vpil = vector<pil>;
using vvi = vector<vector<int>>;using vvl = vector<vector<ll>>;using vvs = vector<vector<string>>;using vvb = vector<vector<bool>>;
using vvpii = vector<vector<pii>>;using vvpli = vector<vector<pli>>;using vvpll = vector<vpll>;using vvpil = vector<vpil>;
using mint = modint998244353;
//using mint = modint1000000007;
//using mint = dynamic_modint<0>;
using vm = vector<mint>;
using vvm = vector<vector<mint>>;
vector<int> dx={1,0,-1,0,1,1,-1,-1},dy={0,1,0,-1,1,-1,1,-1};
ll gcd(ll a, ll b) { return a?gcd(b%a,a):b;}
ll lcm(ll a, ll b) { return a/gcd(a,b)*b;}
#define yes {cout <<"Yes"<<endl;}
#define no {cout <<"No"<<endl;}
const double eps = 1e-10;
const ll LINF = 2001002003004005006ll;
const int INF = 1001001001;
#ifdef MY_LOCAL_DEBUG
#include "./debug/localDebug.cpp"
#define showp(p) cerr<<#p<<" = "<<p.fi<<" : "<<p.se<<endl
#define show1(a) cerr<<#a<<" = "<<a<<endl
#define show2(a,b) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<endl
#define show3(a,b,c) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<endl
#define show4(a,b,c,d) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<" : "<<#d<<" = "<<d<<endl
#define show5(a,b,c,d,e) cerr<<#a<<" = "<<a<<" : "<<#b<<" = "<<b<<" : "<<#c<<" = "<<c<<" : "<<#d<<" = "<<d<<" : "<<#e<<" = "<<e<<endl
#define DEBUG_LINE cout << "DEBUG_LINE : " << __LINE__ << endl
#define showv(v) {cout<<#v<<" : "; fore(vy , v) {cout << vy << " ";} cout << endl;}
#define showv2(v) {cout<<#v<<endl; fore(vy , v) cout << vy << "\n";}
#define showvm(v) {cout<<#v<<" : "; fore(vy , v) {cout << vy.val() << " ";} cout << endl;}
#define showvm2(v) {cout<<#v<<endl; fore(vy , v) cout << vy.val() << "\n";}
#define showmat(v) {cout<<#v<<endl; fore(row , v) { fore(seg , row) cout << seg << " "; cout << endl;}}
#define showmatm(v) {cout<<#v<<endl; fore(row , v) { fore(seg , row) cout << seg.val() << " "; cout << endl;}}
#else
#define showp(p)
#define show1(a)
#define show2(a,b)
#define show3(a,b,c)
#define show4(a,b,c,d)
#define show5(a,b,c,d,e)
#define DEBUG_LINE
#define showv(v)
#define showv2(v)
#define showvm(v)
#define showvm2(v)
#define showmat(v)
#define showmatm(v)
#endif
#define overload5(a,b,c,d,e,f, ...) f
#define show(...) overload5(__VA_ARGS__, show5, show4, show3, show2, show1)(__VA_ARGS__)
struct NTT
{
uint32_t get_pr()
{
uint32_t _mod = mint::mod();
using u64 = uint64_t;
u64 ds[32] = {};
int idx = 0;
u64 m = _mod - 1;
for (u64 i = 2; i * i <= m; ++i)
{
if (m % i == 0)
{
ds[idx++] = i;
while (m % i == 0)
m /= i;
}
}
if (m != 1)
ds[idx++] = m;
uint32_t _pr = 2;
while (1)
{
int flg = 1;
for (int i = 0; i < idx; ++i)
{
u64 a = _pr, b = (_mod - 1) / ds[i], r = 1;
while (b)
{
if (b & 1)
r = r * a % _mod;
a = a * a % _mod;
b >>= 1;
}
if (r == 1)
{
flg = 0;
break;
}
}
if (flg == 1)
break;
++_pr;
}
return _pr;
};
static constexpr uint32_t mod = mint::mod();
uint32_t pr = get_pr();
static constexpr int level = __builtin_ctzll(mod - 1);
mint dw[level], dy[level];
void setwy(int k)
{
mint w[level], y[level];
w[k - 1] = mint(pr).pow((mod - 1) / (1 << k));
y[k - 1] = w[k - 1].inv();
for (int i = k - 2; i > 0; --i)
w[i] = w[i + 1] * w[i + 1], y[i] = y[i + 1] * y[i + 1];
dw[1] = w[1], dy[1] = y[1], dw[2] = w[2], dy[2] = y[2];
for (int i = 3; i < k; ++i)
{
dw[i] = dw[i - 1] * y[i - 2] * w[i];
dy[i] = dy[i - 1] * w[i - 2] * y[i];
}
}
NTT() { setwy(level); }
void fft4(vector<mint> &a, int k)
{
if ((int)a.size() <= 1)
return;
if (k == 1)
{
mint a1 = a[1];
a[1] = a[0] - a[1];
a[0] = a[0] + a1;
return;
}
if (k & 1)
{
int v = 1 << (k - 1);
for (int j = 0; j < v; ++j)
{
mint ajv = a[j + v];
a[j + v] = a[j] - ajv;
a[j] += ajv;
}
}
int u = 1 << (2 + (k & 1));
int v = 1 << (k - 2 - (k & 1));
mint one = mint(1);
mint imag = dw[1];
while (v)
{
// jh = 0
{
int j0 = 0;
int j1 = v;
int j2 = j1 + v;
int j3 = j2 + v;
for (; j0 < v; ++j0, ++j1, ++j2, ++j3)
{
mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
mint t0p2 = t0 + t2, t1p3 = t1 + t3;
mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;
a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;
}
}
// jh >= 1
mint ww = one, xx = one * dw[2], wx = one;
for (int jh = 4; jh < u;)
{
ww = xx * xx, wx = ww * xx;
int j0 = jh * v;
int je = j0 + v;
int j2 = je + v;
for (; j0 < je; ++j0, ++j2)
{
mint t0 = a[j0], t1 = a[j0 + v] * xx, t2 = a[j2] * ww,
t3 = a[j2 + v] * wx;
mint t0p2 = t0 + t2, t1p3 = t1 + t3;
mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * imag;
a[j0] = t0p2 + t1p3, a[j0 + v] = t0p2 - t1p3;
a[j2] = t0m2 + t1m3, a[j2 + v] = t0m2 - t1m3;
}
xx *= dw[__builtin_ctzll((jh += 4))];
}
u <<= 2;
v >>= 2;
}
}
void ifft4(vector<mint> &a, int k)
{
if ((int)a.size() <= 1)
return;
if (k == 1)
{
mint a1 = a[1];
a[1] = a[0] - a[1];
a[0] = a[0] + a1;
return;
}
int u = 1 << (k - 2);
int v = 1;
mint one = mint(1);
mint imag = dy[1];
while (u)
{
// jh = 0
{
int j0 = 0;
int j1 = v;
int j2 = v + v;
int j3 = j2 + v;
for (; j0 < v; ++j0, ++j1, ++j2, ++j3)
{
mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
mint t0p1 = t0 + t1, t2p3 = t2 + t3;
mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * imag;
a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3;
a[j1] = t0m1 + t2m3, a[j3] = t0m1 - t2m3;
}
}
// jh >= 1
mint ww = one, xx = one * dy[2], yy = one;
u <<= 2;
for (int jh = 4; jh < u;)
{
ww = xx * xx, yy = xx * imag;
int j0 = jh * v;
int je = j0 + v;
int j2 = je + v;
for (; j0 < je; ++j0, ++j2)
{
mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v];
mint t0p1 = t0 + t1, t2p3 = t2 + t3;
mint t0m1 = (t0 - t1) * xx, t2m3 = (t2 - t3) * yy;
a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww;
a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww;
}
xx *= dy[__builtin_ctzll(jh += 4)];
}
u >>= 4;
v <<= 2;
}
if (k & 1)
{
u = 1 << (k - 1);
for (int j = 0; j < u; ++j)
{
mint ajv = a[j] - a[j + u];
a[j] += a[j + u];
a[j + u] = ajv;
}
}
}
void ntt(vector<mint> &a)
{
if ((int)a.size() <= 1)
return;
fft4(a, __builtin_ctz(a.size()));
}
void intt(vector<mint> &a)
{
if ((int)a.size() <= 1)
return;
ifft4(a, __builtin_ctz(a.size()));
mint iv = mint(a.size()).inv();
for (auto &x : a)
x *= iv;
}
vector<mint> multiply(const vector<mint> &a, const vector<mint> &b)
{
int l = a.size() + b.size() - 1;
if (min<int>(a.size(), b.size()) <= 40)
{
vector<mint> s(l);
for (int i = 0; i < (int)a.size(); ++i)
for (int j = 0; j < (int)b.size(); ++j)
s[i + j] += a[i] * b[j];
return s;
}
int k = 2, M = 4;
while (M < l)
M <<= 1, ++k;
setwy(k);
vector<mint> s(M), t(M);
for (int i = 0; i < (int)a.size(); ++i)
s[i] = a[i];
for (int i = 0; i < (int)b.size(); ++i)
t[i] = b[i];
fft4(s, k);
fft4(t, k);
for (int i = 0; i < M; ++i)
s[i] *= t[i];
ifft4(s, k);
s.resize(l);
mint invm = mint(M).inv();
for (int i = 0; i < l; ++i)
s[i] *= invm;
return s;
}
void ntt_doubling(vector<mint> &a)
{
int M = (int)a.size();
auto b = a;
intt(b);
mint r = 1, zeta = mint(pr).pow((mint::mod() - 1) / (M << 1));
for (int i = 0; i < M; i++)
b[i] *= r, r *= zeta;
ntt(b);
copy(begin(b), end(b), back_inserter(a));
}
};
struct FPS : vector<mint>
{
using vector<mint>::vector;
FPS &operator+=(const FPS &r)
{
if (r.size() > this->size())
this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++)
(*this)[i] += r[i];
return *this;
}
FPS &operator+=(const mint &r)
{
if (this->empty())
this->resize(1);
(*this)[0] += r;
return *this;
}
FPS &operator-=(const FPS &r)
{
if (r.size() > this->size())
this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++)
(*this)[i] -= r[i];
return *this;
}
FPS &operator-=(const mint &r)
{
if (this->empty())
this->resize(1);
(*this)[0] -= r;
return *this;
}
FPS &operator*=(const mint &v)
{
for (int k = 0; k < (int)this->size(); k++)
(*this)[k] *= v;
return *this;
}
FPS &operator/=(const FPS &r)
{
if (this->size() < r.size())
{
this->clear();
return *this;
}
int n = this->size() - r.size() + 1;
if ((int)r.size() <= 64)
{
FPS f(*this), g(r);
g.shrink();
mint coeff = g.back().inv();
for (auto &x : g)
x *= coeff;
int deg = (int)f.size() - (int)g.size() + 1;
int gs = g.size();
FPS quo(deg);
for (int i = deg - 1; i >= 0; i--)
{
quo[i] = f[i + gs - 1];
for (int j = 0; j < gs; j++)
f[i + j] -= quo[i] * g[j];
}
*this = quo * coeff;
this->resize(n, mint(0));
return *this;
}
return *this = ((*this).rev().pre(n) * r.rev().inv(n)).pre(n).rev();
}
FPS &operator%=(const FPS &r)
{
*this -= *this / r * r;
shrink();
return *this;
}
FPS operator+(const FPS &r) const { return FPS(*this) += r; }
FPS operator+(const mint &v) const { return FPS(*this) += v; }
FPS operator-(const FPS &r) const { return FPS(*this) -= r; }
FPS operator-(const mint &v) const { return FPS(*this) -= v; }
FPS operator*(const FPS &r) const { return FPS(*this) *= r; }
FPS operator*(const mint &v) const { return FPS(*this) *= v; }
FPS operator/(const FPS &r) const { return FPS(*this) /= r; }
FPS operator%(const FPS &r) const { return FPS(*this) %= r; }
FPS operator-() const
{
FPS ret(this->size());
for (int i = 0; i < (int)this->size(); i++)
ret[i] = -(*this)[i];
return ret;
}
void shrink()
{
while (this->size() && this->back() == mint(0))
this->pop_back();
}
FPS rev() const
{
FPS ret(*this);
reverse(rng(ret));
return ret;
}
FPS dot(FPS r) const
{
FPS ret(min(this->size(), r.size()));
for (int i = 0; i < (int)ret.size(); i++)
ret[i] = (*this)[i] * r[i];
return ret;
}
FPS pre(int sz) const
{
return FPS((*this).begin(), (*this).begin() + min((int)this->size(), sz));
}
FPS operator>>(int sz) const
{
if ((int)this->size() <= sz)
return {};
FPS ret(*this);
ret.erase(ret.begin(), ret.begin() + sz);
return ret;
}
FPS operator<<(int sz) const
{
FPS ret(*this);
ret.insert(ret.begin(), sz, mint(0));
return ret;
}
FPS diff() const
{
const int n = (int)this->size();
FPS ret(max(0, n - 1));
mint one(1), coeff(1);
for (int i = 1; i < n; i++)
{
ret[i - 1] = (*this)[i] * coeff;
coeff += one;
}
return ret;
}
FPS integral() const
{
const int n = (int)this->size();
FPS ret(n + 1);
ret[0] = mint(0);
if (n > 0)
ret[1] = mint(1);
auto mod = mint::mod();
for (int i = 2; i <= n; i++)
ret[i] = (-ret[mod % i]) * (mod / i);
for (int i = 0; i < n; i++)
ret[i + 1] *= (*this)[i];
return ret;
}
mint eval(mint x) const
{
mint r = 0, w = 1;
for (auto &v : *this)
r += w * v, w *= x;
return r;
}
FPS log(int deg = -1) const
{
assert((*this)[0] == mint(1));
if (deg == -1)
deg = (int)this->size();
return (this->diff() * this->inv(deg)).pre(deg - 1).integral();
}
FPS pow(int64_t k, int deg = -1) const
{
const int n = (int)this->size();
if (deg == -1)
deg = n;
if (k == 0)
{
FPS ret(deg);
if (deg)
ret[0] = 1;
return ret;
}
for (int i = 0; i < n; i++)
{
if ((*this)[i] != mint(0))
{
mint rev = mint(1) / (*this)[i];
FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg);
ret *= (*this)[i].pow(k);
ret = (ret << (i * k)).pre(deg);
if ((int)ret.size() < deg)
ret.resize(deg, mint(0));
return ret;
}
if (__int128_t(i + 1) * k >= deg)
return FPS(deg, mint(0));
}
return FPS(deg, mint(0));
}
FPS &operator*=(
const FPS &r)
{
if (this->empty() || r.empty())
{
this->clear();
return *this;
}
set_fft();
auto ret = static_cast<NTT *>(ntt_ptr)->multiply(*this, r);
return *this = FPS(ret.begin(), ret.end());
}
FPS inv(int deg = -1) const
{
assert((*this)[0] != mint(0));
if (deg == -1)
deg = (int)this->size();
FPS res(deg);
res[0] = {mint(1) / (*this)[0]};
for (int d = 1; d < deg; d <<= 1)
{
FPS f(2 * d), g(2 * d);
for (int j = 0; j < min((int)this->size(), 2 * d); j++)
f[j] = (*this)[j];
for (int j = 0; j < d; j++)
g[j] = res[j];
f.ntt();
g.ntt();
for (int j = 0; j < 2 * d; j++)
f[j] *= g[j];
f.intt();
for (int j = 0; j < d; j++)
f[j] = 0;
f.ntt();
for (int j = 0; j < 2 * d; j++)
f[j] *= g[j];
f.intt();
for (int j = d; j < min(2 * d, deg); j++)
res[j] = -f[j];
}
return res.pre(deg);
}
FPS exp(int deg) const
{
using fps = FPS;
assert((*this).size() == 0 || (*this)[0] == mint(0));
if (deg == -1)
deg = this->size();
fps inv;
inv.reserve(deg + 1);
inv.push_back(mint(0));
inv.push_back(mint(1));
auto inplace_integral = [&](fps &F) -> void
{
const int n = (int)F.size();
auto mod = mint::mod();
while ((int)inv.size() <= n)
{
int i = inv.size();
inv.push_back((-inv[mod % i]) * (mod / i));
}
F.insert(F.begin(), mint(0));
for (int i = 1; i <= n; i++)
F[i] *= inv[i];
};
auto inplace_diff = [](fps &F) -> void
{
if (F.empty())
return;
F.erase(F.begin());
mint coeff = 1, one = 1;
for (int i = 0; i < (int)F.size(); i++)
{
F[i] *= coeff;
coeff += one;
}
};
fps b{1, 1 < (int)this->size() ? (*this)[1] : 0}, c{1}, z1, z2{1, 1};
for (int m = 2; m < deg; m *= 2)
{
auto y = b;
y.resize(2 * m);
y.ntt();
z1 = z2;
fps z(m);
for (int i = 0; i < m; ++i)
z[i] = y[i] * z1[i];
z.intt();
fill(z.begin(), z.begin() + m / 2, mint(0));
z.ntt();
for (int i = 0; i < m; ++i)
z[i] *= -z1[i];
z.intt();
c.insert(c.end(), z.begin() + m / 2, z.end());
z2 = c;
z2.resize(2 * m);
z2.ntt();
fps x((*this).begin(), (*this).begin() + min<int>(this->size(), m));
x.resize(m);
inplace_diff(x);
x.push_back(mint(0));
x.ntt();
for (int i = 0; i < m; ++i)
x[i] *= y[i];
x.intt();
x -= b.diff();
x.resize(2 * m);
for (int i = 0; i < m - 1; ++i)
x[m + i] = x[i], x[i] = mint(0);
x.ntt();
for (int i = 0; i < 2 * m; ++i)
x[i] *= z2[i];
x.intt();
x.pop_back();
inplace_integral(x);
for (int i = m; i < min<int>(this->size(), 2 * m); ++i)
x[i] += (*this)[i];
fill(x.begin(), x.begin() + m, mint(0));
x.ntt();
for (int i = 0; i < 2 * m; ++i)
x[i] *= y[i];
x.intt();
b.insert(b.end(), x.begin() + m, x.end());
}
return fps{b.begin(), b.begin() + deg};
}
// 使
static void set_fft() {
if (!ntt_ptr) ntt_ptr = new NTT;
}
void ntt() {
set_fft();
static_cast<NTT*>(ntt_ptr)->ntt(*this);
}
void intt() {
set_fft();
static_cast<NTT*>(ntt_ptr)->intt(*this);
}
void ntt_doubling() {
set_fft();
static_cast<NTT*>(ntt_ptr)->ntt_doubling(*this);
}
static int ntt_pr() {
set_fft();
return static_cast<NTT*>(ntt_ptr)->pr;
}
static void *ntt_ptr;
};
void *FPS::ntt_ptr = nullptr;
// FPS
// : f/g q f = g*q + r
// pow(int64_t k, int deg = -1) : f^k mod x^deg
// : f/g (mod n) f * g.inv(n)
// x^0, x^1, ..., x^N
// x^{n-1} , c[n] a_0 b_n
// a_n b_0 ,
template <typename fps>
struct RelaxedConvolution {
using mint = typename fps::value_type;
int N, q;
vector<mint> a, b, c;
fps f, g;
vector<fps> as, bs;
RelaxedConvolution(int _n) : N(_n), q(0) {
a.resize(N + 1), b.resize(N + 1), c.resize(N + 1);
}
// a[q] = x, b[q] = y c[q] get
mint get(mint x, mint y) {
assert(q <= N);
a[q] = x, b[q] = y;
c[q] += a[q] * b[0] + (q ? b[q] * a[0] : 0);
auto precalc = [&](int lg) {
if ((int)as.size() <= lg) as.resize(lg + 1), bs.resize(lg + 1);
if (!as[lg].empty()) return;
int d = 1 << lg;
fps s{begin(a), begin(a) + d * 2};
fps t{begin(b), begin(b) + d * 2};
s.ntt(), t.ntt();
as[lg] = s, bs[lg] = t;
};
q++;
if (q > N) return c[q - 1];
for (int d = 1, lg = 0; d <= q; d *= 2, lg++) {
if (q % (2 * d) != d) continue;
if (q == d) {
f.assign(2 * d, mint{});
g.assign(2 * d, mint{});
for (int i = 0; i < d; i++) f[i] = a[i];
for (int i = 0; i < d; i++) g[i] = b[i];
f.ntt(), g.ntt();
for (int i = 0; i < d * 2; i++) f[i] *= g[i];
f.intt();
for (int i = q; i < min(q + d, N + 1); i++) c[i] += f[d + i - q];
} else {
precalc(lg);
f.assign(2 * d, mint{});
g.assign(2 * d, mint{});
for (int i = 0; i < d; i++) f[i] = a[q - d + i];
for (int i = 0; i < d; i++) g[i] = b[q - d + i];
f.ntt(), g.ntt();
fps& s = as[lg];
fps& t = bs[lg];
for (int i = 0; i < d * 2; i++) f[i] = f[i] * t[i] + g[i] * s[i];
f.intt();
for (int i = q; i < min(q + d, N + 1); i++) c[i] += f[d + i - q];
}
}
return c[q - 1];
}
};
struct combination {
vector<mint> fact, ifact;
combination(int n):fact(n+1),ifact(n+1) {
fact[0] = 1;
for (int i = 1; i <= n; ++i) fact[i] = fact[i-1]*i;
ifact[n] = fact[n].inv();
for (int i = n; i >= 1; --i) ifact[i-1] = ifact[i]*i;
}
mint operator()(int n, int k) {
if (k < 0 || k > n) return 0;
return fact[n]*ifact[k]*ifact[n-k];
}
} com(10000005);
void solve(){
int m,n; cin>>m>>n;
vi a(n);
rep(i,n) cin>>a[i],a[i]--;
vi cnt(m);
rep(i,n) cnt[a[i]]++;
vm g(n+1);
vm b(n+1);
vm c(n+1);
rep(i,n+1) c[i] = com.ifact[i+1];
int mx = 0;
rep(i,m) chmax(mx, cnt[i]);
RelaxedConvolution<FPS> rc(n+1);
rep(i,mx-1){
rc.get(b[i], c[i]);
}
map<int,int> mp;
rep(i,m){
mp[cnt[i]]++;
}
vm now;
vi p;
mint coef = 1;
fore(y , mp){
now.pb(mx-y.fi);
p.pb(y.se);
coef *= com.fact[mx-y.fi].pow(y.se);
}
mint ans = 0;
repx(i,mx,n+1){
mint tmp = rc.get(b[i-1], c[i-1]) * com.fact[i];
mint f = com.fact[i].pow(m) / coef;
mint g = f - tmp;
ans += g;
b[i] = g*com.ifact[i];
rep(j,sz(now)){
now[j]++;
coef *= now[j].pow(p[j]);
}
}
outm;
return;
}
int main(){
ios::sync_with_stdio(false);
cin.tie(0);
int t = 1;
//cin>>t;
rep(i,t){
solve();
}
return 0;
}
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
0