結果
問題 | No.2975 単調増加部分積 |
ユーザー |
![]() |
提出日時 | 2024-02-07 22:27:19 |
言語 | C++17 (gcc 13.3.0 + boost 1.87.0) |
結果 |
AC
|
実行時間 | 39 ms / 10,000 ms |
コード長 | 31,400 bytes |
コンパイル時間 | 5,370 ms |
コンパイル使用メモリ | 289,052 KB |
最終ジャッジ日時 | 2025-02-19 02:48:51 |
ジャッジサーバーID (参考情報) |
judge1 / judge1 |
(要ログイン)
ファイルパターン | 結果 |
---|---|
other | AC * 24 |
ソースコード
#if !defined(MYLOCAL)//提出時用テンプレート#pragma GCC optimize("Ofast")#if defined(NDEBUG)#undef NDEBUG#endif#include "bits/stdc++.h"#if !defined(_MSC_VER) && __has_include(<atcoder/all>)#include <atcoder/all>using namespace atcoder;#endifusing namespace std;using ll=long long; using dd=double;using vll=vector< ll>; using vdd=vector< dd>;using vvll=vector< vll>; using vvdd=vector<vdd>;using vvvll=vector< vvll>; using vvvdd=vector<vvdd>;using vvvvll=vector<vvvll>;using pll=pair<ll,ll>; using tll=tuple<ll,ll,ll>; using qll=tuple<ll,ll,ll,ll>;using vpll=vector< pll>; using vtll=vector< tll>; using vqll=vector< qll>;using vvpll=vector<vpll>; using vvtll=vector<vtll>; using vvqll=vector<vqll>;using namespace chrono;constexpr ll INF = 1001001001001001001;struct Fast{ Fast(){ cin.tie(0); ios::sync_with_stdio(false); cout<<fixed<<setprecision(numeric_limits<double>::max_digits10); } } fast;#define REPS(i, S, E) for (ll i = (S); i <= (E); i++)#define REP(i, N) REPS(i, 0, (N)-1)#define DEPS(i, S, E) for (ll i = (E); i >= (S); i--)#define DEP(i, N) DEPS(i, 0, (N)-1)#define EXPAND( x ) x//VS用おまじない#define overload3(_1,_2,_3,name,...) name#define overload4(_1,_2,_3,_4,name,...) name#define overload5(_1,_2,_3,_4,_5,name,...) name#define rep3(i, S, E) for (ll i = (S); i <= (E); i++)#define rep4(i, S, E, t) for (ll i = (S); i <= (E); i+=(t))#define rep(...) EXPAND(overload4(__VA_ARGS__,rep4,rep3,_,_)(__VA_ARGS__))#define dep3(i, E, S) for (ll i = (E); i >= (S); i--)#define dep4(i, E, S, t) for (ll i = (E); i >= (S); i-=(t))#define dep(...) EXPAND(overload4(__VA_ARGS__, dep4, dep3,_,_)(__VA_ARGS__))#define each2(e,v) for (auto && e:v)#define each3(a,b,v) for (auto &&[a,b]:v)#define each4(a,b,c,v) for (auto &&[a,b,c]:v)#define each5(a,b,c,d,v) for (auto &&[a,b,c,d]:v)#define each(...) EXPAND(overload5(__VA_ARGS__,each5,each4,each3,each2,_)(__VA_ARGS__))#define ALL1(v) (v).begin(), (v).end()#define ALL2(v,E) (v).begin(), (v).begin()+((E)+1)#define ALL3(v,S,E) (v).begin()+(S), (v).begin()+((E)+1)#define ALL(...) EXPAND(overload3(__VA_ARGS__, ALL3, ALL2, ALL1)(__VA_ARGS__))#define all ALL#define RALL1(v) (v).rbegin(), (v).rend()#define RALL2(v,E) (v).rbegin(), (v).rbegin()+((E)+1)#define RALL3(v,S,E) (v).rbegin()+(S), (v).rbegin()+((E)+1)#define RALL(...) EXPAND(overload3(__VA_ARGS__, RALL3, RALL2, RALL1)(__VA_ARGS__))#define rall RALLtemplate<class T> inline bool chmax(T &a, T b) { if (a < b) { a = b; return true; }return false; }template<class T> inline bool chmin(T &a, T b) { if (a > b) { a = b; return true; }return false; }template<class T> inline T MaxE(vector<T>&v,ll S,ll E){ T m=v[S]; rep(i,S,E)chmax(m,v[i]); return m; }template<class T> inline T MinE(vector<T>&v,ll S,ll E){ T m=v[S]; rep(i,S,E)chmin(m,v[i]); return m; }template<class T> inline T MaxE(vector<T> &v) { return MaxE(v,0,(ll)v.size()-1); }template<class T> inline T MinE(vector<T> &v) { return MinE(v,0,(ll)v.size()-1); }template<class T> inline auto maxe(T &&v,ll S,ll E){ return *max_element(ALL(v,S,E)); }template<class T> inline auto maxe(T &&v){ return *max_element(ALL(v)); }template<class T> inline auto mine(T &&v,ll S,ll E){ return *min_element(ALL(v,S,E)); }template<class T> inline auto mine(T &&v){ return *min_element(ALL(v)); }template<class T> inline T Sum(vector<T> &v,ll S,ll E){ T s=T(); rep(i,S,E)s+=v[i]; return s; }template<class T> inline T Sum(vector<T> &v) { return Sum(v,0,v.size()-1); }template<class T,class U=typename remove_reference<T>::type::value_type>inline U sum(T &&v,ll S,ll E) {return accumulate(all(v,S,E),U());}template<class T> inline auto sum(T &&v) {return sum(v,0,v.end()-v.begin()-1);}template<class T> inline ll sz(T &&v){ return (ll)v.size(); }inline ll CEIL(ll a,ll b){ return (a<0) ? -(-a/b) : (a+b-1)/b; } //負もOKinline ll FLOOR(ll a,ll b){ return -CEIL(-a,b); } //負もOK//pair用テンプレートtemplate<class T,class S> inline pair<T,S>& operator+=(pair<T,S> &a,const pair<T,S> &b){ a.first+=b.first; a.second+=b.second; return a; }template<class T,class S> inline pair<T,S>& operator-=(pair<T,S> &a,const pair<T,S> &b){ a.first-=b.first; a.second-=b.second; return a; }template<class T,class S> inline pair<T,S>& operator*=(pair<T,S> &a,const pair<T,S> &b){ a.first*=b.first; a.second*=b.second; return a; }template<class T,class S> inline pair<T,S>& operator/=(pair<T,S> &a,const pair<T,S> &b){ a.first/=b.first; a.second/=b.second; return a; }template<class T,class S> inline pair<T,S>& operator%=(pair<T,S> &a,const pair<T,S> &b){ a.first%=b.first; a.second%=b.second; return a; }template<class T,class S,class R> inline pair<T,S>& operator+=(pair<T,S> &a,R b){ a.first+=b; a.second+=b; return a; }template<class T,class S,class R> inline pair<T,S>& operator-=(pair<T,S> &a,R b){ a.first-=b; a.second-=b; return a; }template<class T,class S,class R> inline pair<T,S>& operator*=(pair<T,S> &a,R b){ a.first*=b; a.second*=b; return a; }template<class T,class S,class R> inline pair<T,S>& operator/=(pair<T,S> &a,R b){ a.first/=b; a.second/=b; return a; }template<class T,class S,class R> inline pair<T,S>& operator%=(pair<T,S> &a,R b){ a.first%=b; a.second%=b; return a; }template<class T,class S,class R> inline pair<T,S> operator+(const pair<T,S> &a,R b){ pair<T,S> c=a; return c+=b; }template<class T,class S,class R> inline pair<T,S> operator-(const pair<T,S> &a,R b){ pair<T,S> c=a; return c-=b; }template<class T,class S,class R> inline pair<T,S> operator*(const pair<T,S> &a,R b){ pair<T,S> c=a; return c*=b; }template<class T,class S,class R> inline pair<T,S> operator/(const pair<T,S> &a,R b){ pair<T,S> c=a; return c/=b; }template<class T,class S,class R> inline pair<T,S> operator%(const pair<T,S> &a,R b){ pair<T,S> c=a; return c%=b; }template<class T,class S,class R> inline pair<T,S> operator-(R b,const pair<T,S> &a){ pair<T,S> c=-a; return c+=b; }template<class T,class S> inline pair<T,S> operator-(const pair<T,S> &a,const pair<T,S> &b){ pair<T,S> c=a; return c-=b; }template<class T,class S> inline pair<T,S> operator-(const pair<T,S> &a){ pair<T,S> c=a; return c*=(-1); }template<class T,class S> inline ostream &operator<<(ostream &os,const pair<T,S> &a){ return os << a.first << ' ' << a.second; }//tuple用テンプレート 出力用のみtemplate<class T,class S,class R> inline ostream &operator<<(ostream &os,const tuple<T,S,R> &a){ return os << get<0>(a) << ' ' << get<1>(a) << ' ' <<get<2>(a); }template<class T,class S,class R,class Q> inline ostream &operator<<(ostream &os,const tuple<T,S,R,Q> &a){ return os << get<0>(a) << ' ' << get<1>(a)<< ' ' << get<2>(a) << ' ' << get<3>(a); }//vector用テンプレートtemplate<class T> inline ostream &operator<<(ostream &os,const vector<T> &a){ for (ll i=0; i<(ll)a.size(); i++) os<<(i>0?" ":"")<<a[i]; return os; }#endif//テンプレートend#if defined(_MSC_VER) && __has_include(<atcoder/all>)#include <atcoder/all>using namespace atcoder;#endifstruct{system_clock::time_point st = system_clock::now();ll operator()()const{return duration_cast<microseconds>(system_clock::now()-st).count()/1000;}} timeget;//////////////////////////////////////////namespace fpsspace{using Int = long long;using ll = long long;constexpr int inf=int(1e9);/********* utility関数 *********/template<class T> T POW(T a,ll n){//a^n n負も可if (n<0) a=T(1)/a,n=-n;T r=1;for (; n>0; n>>=1,a*=a) if (n&1)r*=a;return r;}ll LimitMul(ll a,ll b,ll l=ll(9e18)){//min(a*b,l) a,b≧0return (b==0 || a<=l/b) ? a*b : l;}/*---- 1/i列挙 i=1~d ----*/template<int Kind> struct Wrap{};//オーバロード解決用にKindを型に変換template<class T,int Kind,class=enable_if_t<Kind==1 || Kind==2>>vector<T> &Invs(int d,Wrap<Kind>){//Kind=1 or 2(modint系)の時static vector<T> invs(2,T(1));int MOD = T::mod();for (int i=(int)invs.size();i<=d;++i) invs.push_back(-invs[MOD%i]*T(MOD/i));return invs;}template<class T> vector<T> &Invs(int d,Wrap<0>){//その他の時static vector<T> invs(1);for (int i=(int)invs.size();i<=d;++i) invs.push_back(T(1)/i);return invs;}template<class T> vector<T> &Fact(int d){// i!列挙 i=0~dstatic vector<T> fact(1,T(1));for (int i=(int)fact.size();i<=d;++i) fact.push_back(fact.back()*T(i));return fact;}template<class T,int Kind> vector<T> &FInv(int d){// 1/i!列挙 i=0~dstatic vector<T> finv(1,T(1));const vector<T> &invs=Invs<T>(d,Wrap<Kind>{});for (int i=(int)finv.size();i<=d;++i) finv.push_back(finv.back()*invs[i]);return finv;}// Berlekamp Massey法 2L-1次までのA(x)からA=P/QのQをL次で復元 Kind=1,2のみtemplate <class T> vector<T> BerlekampMassey(const vector<T> &a){vector<T> C={1},B={1};//C:求める数列、B:1つ前のCの状態を保存int m=1; //ポインタ?っぽいものT b=T(1); //前回のdの値auto C_update=[](vector<T> &C,T d,T b,vector<T> &B,int m){T d_b=d/b;int M=(int)B.size();if ((int)C.size()<M+m) C.resize(M+m);for (int i=0;i<M;++i) C[i+m]-=d_b*B[i];};for (int n=0;n<(int)a.size();++n){T d=T(0);for (int k=0;k<(int)C.size();++k) d+=C[k]*a[n-k]; //dを計算if (d!=T(0)){//①d=0なら、現在のCでAnを求める漸化式は成り立っている,そうでないなら調整if (2*((int)C.size()-1) <= n){vector<T> tmp=C;C_update(C,d,b,B,m); //C -= d/b * (Bをmだけ右シフトしたもの)B.swap(tmp); b=d; m=0;}else C_update(C,d,b,B,m); //C -= d/b * (Bをmだけ右シフトしたもの)}m++;}return C;}template<class FPS,class SPFPS,class T=typename FPS::value_type,class S>FPS de_sparse( //a*F'=b*Fを満たすFconst SPFPS &a_,const SPFPS &b_,S f0,Int dmx_,const vector<T> &invs_=vector<T>()){assert(a_.lowdeg()<=b_.lowdeg());int dmx=(int)dmx_;const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dmx,Wrap<FPS::kind>{});SPFPS a=a_.shift(-a_.lowdeg()),b=b_.shift(-a_.lowdeg());T a0inv=T(1)/a.co(0);a*=a0inv,b*=a0inv;a.erase(a.begin());FPS f({T(f0)},dmx);for (int d=1;d<=dmx;++d){for (auto [bb,i]:b){if (d-1-i>=0) f.at(d)+=bb*f[d-1-i];}for (auto [aa,i]:a){if (d-i>=0) f.at(d)-=aa*f[d-i]*(d-i);}f.at(d)*=invs[d];}return f;}/********* 疎FPSクラス *********/template<class T> struct sparseFps: vector<pair<T,Int>>{using vector<pair<T,Int>>::vector; //親クラスのコンストラクタの隠蔽を回避sparseFps &Norm(){//d昇順、同一dのco加算、co=0を削除sort(this->begin(),this->end(),[](const auto &x,const auto &y){return x.second<y.second;});int j=-1;for (int i=0;i<this->size();++i){if (j>=0 && deg(j)==deg(i)){co(j)+=co(i);}else{if (!(j>=0 && co(j)==T(0))) ++j;(*this)[j]=(*this)[i];}}if (j>=0 && co(j)==T(0)) --j;this->resize(j+1);return *this;}/*---- I/F ----*/template<class S,class R>void set(S co,R deg){ this->emplace_back(T(co),Int(deg)); }Int deg()const{ return this->empty() ? -1 : this->back().second; }//最高次数T co(Int i)const{ return (*this)[i].first; }//(*this)[i]の係数T &co(Int i) { return (*this)[i].first; }Int deg(Int i)const{ return (*this)[i].second; }//(*this)[i]の次数Int °(Int i) { return (*this)[i].second; }Int lowdeg()const{ return this->empty() ? inf : this->front().second; }sparseFps &operator+=(const sparseFps &sg){this->insert(this->end(),sg.begin(),sg.end());return Norm();}sparseFps operator+(const sparseFps &sg)const{ return sparseFps(*this)+=sg; }sparseFps &operator*=(T b){ for (auto&&[c,_]:*this) c*=b; return *this; }sparseFps operator*(T b)const{ return sparseFps(*this)*=b; }sparseFps &operator*=(const sparseFps &sg){ return *this=*this*sg; }sparseFps operator*(const sparseFps &sg)const{sparseFps ret;for (auto&&[cf,df]:*this) for (auto&&[cg,dg]:sg) ret.set(cf*cg,df+dg);return ret.Norm();}sparseFps shift(Int k)const{ // *x^ksparseFps ret;for (auto&&[co,d]:*this) if (d+k>=0) ret.set(co,d+k);return ret;}sparseFps diff()const{sparseFps ret;for (auto&&[co,d]:*this) if (d>0) ret.set(co*d,d-1);return ret;}template<class FPS> FPS exp(Int dmx)const{assert(lowdeg()!=0); //定数項=0必須return de_sparse<FPS>(sparseFps{{1,0},},diff(),1,dmx);}template<class FPS>FPS pow(ll k,Int dmx,const vector<T> &invs_=vector<T>())const{assert(!(k<0 && lowdeg()>0));//k負なら定数項必須if (k==0) return FPS({1},dmx);//-- 計算後最高次数d:k<0ならdmx、k>0ならmin(dmx,deg()*k)までint d = (k<0 || LimitMul(deg(),k)>(ll)dmx) ? int(dmx) : int(deg()*k);//-- invs[i]=1/iをi=1~dまで計算(計算済み分は再利用、足りない分だけ計算)const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(d,Wrap<FPS::kind>{});//-- 最低次数関連処理int s=(int)lowdeg();//計算前最低次数if (k>0 && LimitMul(s,k)>(ll)dmx) return FPS(dmx);//計算後all0の時//-- 漸化式で計算T f0inv=T(1)/co(0);FPS g({POW(co(0),k)},dmx);for (int i=1;i<=d-s*k;++i){ //k負の時必ずs=0なのでOKfor (int j=1;j<(int)this->size();++j){auto [c,dg]=(*this)[j];int b=int(dg)-s;if (i-b<0)break;g.at(i)+=c*g.at(i-b)*(T(k)*b-i+b);}g.at(i)*=f0inv*invs[i];}return g.shift(Int(s*k));}};/********* FPSクラス *********/template<class T, //係数の型int Kind //係数の種類 0:その他、1:NTTfriendly mod、2:任意mod>struct Fps: vector<T>{static_assert(0<=Kind && Kind<=3);static constexpr int kind=Kind;int dMx=int(1e6); //次数上限(x^dMxより上は保持しない)using vT = vector<T>;/*---- utility ----*/int isize()const{ return (int)vector<T>::size(); }int NormSize()const{//leading zeroを除いたサイズ const用int sv=isize();while (sv>0 && (*this)[sv-1]==T(0)) --sv;return sv;}int Deg()const{ return NormSize()-1; } //最高次数 const用Fps &Cut(){ return cut(dMx); }Fps &ZeroExtend(){int anm=max(0,dMx-isize()+1);vT::insert(vT::end(),anm,T(0));return *this;}int MinD(const Fps &g)const{ return min(dMx,g.dMx); }void MergeD(const Fps &g){ dMx=MinD(g); Cut(); }template <int Sign> Fps &Add(const Fps &g){MergeD(g);for (int i=min(dMx,g.Deg());i>=0;--i) at(i)+=Sign*g[i];return *this;}Fps ProdSparse(const sparseFps<T> &g,int d)const{//f*疎g mod x^(d+1)Fps ret(d);for (auto&&[co,dg]:g) for (int i=0;i<(int)isize();++i){if (dg+i>d) break;ret.at(dg+i)+=co*(*this)[i];}return ret;}Fps InvSparse(const sparseFps<T> &g,int d)const{//f/疎g mod x^(d+1) g0≠0assert(!g.empty() && g.deg(0)==0 && g.co(0)!=0);//-- g定数項を1にするT c0inv=T(1)/g.co(0);Fps ret=((*this)*c0inv).setdmx(d);if (g.size()==1u) return ret;sparseFps<T> gg=g*c0inv;//-- 配るDP計算for (int i=0; i+(int)gg.deg(1)<=d; ++i){for (int j=1; j<(int)gg.size(); ++j){auto [co,dg]=gg[j];int ii=i+(int)dg;if (d<ii)break;ret.at(ii)-=ret.at(i)*co;}}return ret;}Fps &LogSparse( //f+=log(疎g^k),g=1+ax^bconst sparseFps<T> &g,ll k,const vector<T> &invs_=vector<T>()){assert(g.size()==2U && g.co(0)==T(1) && g.deg(0)==0);const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dMx,Wrap<Kind>{});int b=(int)g.deg(1);T c=g.co(1)*k;for (int i=1;i*b<=dMx;++i,c*=-g.co(1)) at(i*b)+=c*invs[i];return *this;}/*---- コンストラクタ ----*/explicit Fps(Int dmx=int(1e6)): dMx(int(dmx)){}Fps(initializer_list<T> i,Int dmx=int(1e6)):vT(i.begin(),i.end()),dMx(int(dmx)){ Cut(); }template <class It,class=typename iterator_traits<It>::iterator_category>Fps(It l,It r,Int dmx=int(1e6)): vT(l,r),dMx(int(dmx)){ Cut(); }Fps(vector<T> &&v,Int dmx=int(1e6)): vT(move(v)),dMx(int(dmx)){}Fps(const sparseFps<T> &sf,Int dmx=int(1e6)):dMx(int(dmx)){ //疎f → ffor (auto&&[co,deg]:sf) if (deg<=dmx) at(deg)=co;}/*---- I/F ----*/sparseFps<T> tosparse()const{ //f → 疎fsparseFps<T> ret;for (int i=0;i<isize();++i){if ((*this)[i]!=T(0)) ret.set((*this)[i],i);}return ret;}Int size()const{ return (Int)vector<T>::size(); }Int deg(){ fit(); return size()-1; }Int lowdeg()const{for (int i=0;i<isize();++i){if ((*this)[i]!=T(0)) return i;}return inf;}Fps &setdmx(Int dmx){ dMx=(int)dmx; return Cut(); }T at(Int i)const{ return size()<=i ? T(0) : (*this)[i]; }T &at(Int i){if (size()<=i) this->resize(i+1);return (*this)[i];}Fps &fit(){this->resize(NormSize());return *this;}Fps &operator+=(const Fps &g){ return Add<1>(g); }Fps &operator-=(const Fps &g){ return Add<-1>(g); }Fps &operator*=(const Fps &g){ return *this=*this*g; }Fps &operator/=(const Fps &g){ return *this=*this/g; }Fps &operator*=(const sparseFps<T> &g){ return *this=*this*g; }Fps &operator/=(const sparseFps<T> &g){ return *this=*this/g; }Fps &operator+=(T c){ at(0)+=c; return *this; }Fps &operator-=(T c){ at(0)-=c; return *this; }Fps &operator*=(T c){ for (auto&& e: *this) e*=c; return *this; }Fps &operator/=(T c){ return (*this)*=T(1)/c; }Fps operator+(const Fps &g)const{ return Fps(*this)+=g; }Fps operator-(const Fps &g)const{ return Fps(*this)-=g; }Fps operator*(const Fps &g)const{ return Prod(*this,g,MinD(g)); }Fps operator/(const Fps &g)const{ return InvSparse(g.tosparse(),MinD(g)); }Fps operator*(const sparseFps<T> &g)const{ return ProdSparse(g,dMx); }Fps operator/(const sparseFps<T> &g)const{ return InvSparse(g,dMx); }Fps operator+(T c)const{ return Fps(*this)+=c; }Fps operator-(T c)const{ return Fps(*this)-=c; }Fps operator*(T c)const{ return Fps(*this)*=c; }Fps operator/(T c)const{ return Fps(*this)/=c; }Fps operator-()const{ return Fps(*this)*=T(-1); }friend Fps operator+(T c,const Fps &f){ return f+c; }friend Fps operator-(T c,const Fps &f){ return -f+c; }friend Fps operator*(T c,const Fps &f){ return f*c; }T prod1(const Fps &g,Int k_)const{ //[x^k]f*gint df=Deg(),dg=g.Deg(),k=(int)k_;if (MinD(g)<k) return T(0);T ret=T(0);for (int i=max(0,k-dg),j=k-i; i<=df&&j>=0; ++i,--j) ret+=(*this)[i]*g[j];return ret;}T bostanmori(const Fps &g,ll k)const{ //[x^k]f/gassert(g.at(0)!=0);Fps P=Fps(*this).setdmx(inf),Q=Fps(g).setdmx(inf);for (; k>0; k>>=1){Fps Q1=Q;for (int i=1;i<Q1.isize();i+=2) Q1[i]*=-1; //Q1=(Qの奇数項を正負反転)Fps PQ=P*Q1,QQ=Q*Q1;P.clear(),Q.clear();for (int i=k&1;i<PQ.isize();i+=2) P.push_back(PQ[i]);//P=(PQの奇or偶数項)for (int i=0; i<QQ.isize();i+=2) Q.push_back(QQ[i]);//Q=(QQの偶数項)}return P.at(0)/Q[0];}Fps berlekamp_massey(Int d)const{ //f=P/QのQを得る x^d(d奇数)までの係数から推定assert(d%2==1);vector<T> f;for (int i=0;i<=d;++i) f.push_back(at(i));vector<T> Q=BerlekampMassey(f);Int dmx=Int(Q.size()-1);return Fps(move(Q),dmx);}T nthterm(Int d,ll k)const{ //[x^k]f 線形漸化式を仮定しx^d(d奇数)までから推定Fps Q=berlekamp_massey(d);Fps P=Prod(*this,Q,Q.dMx-1).fit();return P.bostanmori(Q,k);}Fps &estimate(Int d,Int dmx=-1){ //dmx次まで推定 線形漸化式を仮定しx^d(d奇数)までから推定if (dmx==-1) dmx=dMx;Fps Q=berlekamp_massey(d);Fps P=Prod(*this,Q,Q.dMx-1).fit().setdmx(dmx);return *this=(Q.setdmx(dmx).inv()*P).ZeroExtend();}Fps &cut(Int d){ //x^dまでにするif (d+1<size()) vT::resize(size_t(d+1));return *this;}Fps &mod(Int n){ return cut(n-1); } //mod x^n[[nodiscard]] Fps shift(Int k_)const{ // *x^kFps ret(dMx);const int k=(int)k_,m=min(isize()+k,dMx+1); //変換後長さif (m<=0 || dMx<k) return ret; //空になる時for (int i=m-1-k;i>=max(0,-k);--i) ret.at(i+k)=(*this)[i];return ret;}T eval(T x)const{ //f(c)T ret=T(0);for (int i=isize()-1;i>=0;--i) ret*=x,ret+=(*this)[i];return ret;}Fps diff()const{ //微分Fps ret(dMx-1);for (int i=Deg();i>=1;--i) ret.at(i-1)=(*this)[i]*i;return ret;}Fps integ()const{ //積分Fps ret(dMx+1);for (int i=min(Deg(),dMx); i>=0; --i) ret.at(i+1)=(*this)[i]/(i+1);return ret;}T integrange(T l,T r)const{ //定積分 ∫_l^r f dxFps itg=integ();return itg.eval(r)-itg.eval(l);}Fps inv()const{assert(at(0)!=0);//定数項≠0Fps g{T(1)/at(0)};for (int i=1;i<dMx+1;i*=2){//i:項数g.setdmx(min(i*2-1,dMx));g = g+g-g*g*(*this);}return g;}Fps log()const{ //log fassert(at(0)==T(1));//定数項=1return (diff()*inv()).integ();}Fps exp()const{ //exp fassert(at(0)==T(0));//定数項=0Fps g{1};for (int i=1;i<dMx+1;i*=2){//i:項数g.setdmx(min(i*2-1,dMx));g = g*(T(1)-g.log()+(*this));}return g;}Fps pow(ll k)const{ //f^k k<0は未対応if (k==0) return Fps({1},dMx);if (k==1) return *this;int z=(int)lowdeg();if (z==inf || z>int(dMx/k)) return Fps(dMx);//f(x)=0か結果=0の時int m=int(dMx+1-z*k); //最終は先頭にゼロがz*k個→計算はdMx+1-z*k項でokFps g=shift(-z).setdmx(m-1)/at(z); //定数項1にする変換Fps gk=(g.log()*k).exp(); //g^kFps ret=(gk*POW(at(z),k)).setdmx(dMx).shift(Int(z*k)); //変換を戻すreturn ret;}Fps powdbl(ll k)const{ //f^kFps ret({1},dMx),g=*this;for (; k>0; k>>=1,g*=g) if (k&1)ret*=g;return ret;}Fps powsparse(ll k,const vector<T> &invs=vector<T>())const{ //疎f^kreturn tosparse().template pow<Fps>(k,dMx,invs);}pair<Fps,Fps> div(const Fps &g)const{ //多項式f/g,f%gconst Fps &f=*this;int na=f.NormSize(),nb=g.NormSize();assert(nb>0);int n=na-nb+1;//商の項数if (n<=0) return {Fps(dMx),f};int nu=f.isize(),nv=g.isize();Fps aR(f.rbegin()+nu-na,f.rbegin()+min(nu-na+n,nu),n-1);Fps bR(g.rbegin()+nv-nb,g.rbegin()+min(nv-nb+n,nv),n-1);Fps qR=bR.inv()*aR;qR.resize(n);reverse(qR.begin(),qR.end());qR.fit().setdmx(dMx);Fps r=(f-Prod(qR,g,dMx)).fit();return {move(qR),move(r)};}};/********* 積をNTTmod畳み込み、任意mod畳み込み、畳み込み不使用から選択 *********/template<class T> //f*g mod x^(d+1) 畳み込み不使用Fps<T,0> Prod(const Fps<T,0> &f,const Fps<T,0> &g,int d){return f.ProdSparse(g.tosparse(),d);}template<class T> //f*g mod x^(d+1) NTTmod畳み込みFps<T,1> Prod(const Fps<T,1> &f,const Fps<T,1> &g,int d){int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());vector<ll> ff,gg;ff.reserve(nf),gg.reserve(ng);for (int i=0;i<nf;++i) ff.push_back(f[i].val());for (int i=0;i<ng;++i) gg.push_back(g[i].val());vector<ll> hh=convolution<T::mod()>(ff,gg);if ((int)hh.size()>d+1) hh.resize(d+1);return Fps<T,1>(hh.begin(),hh.end(),d);}template<class T> //f*g mod x^(d+1) 任意mod畳み込みFps<T,2> Prod(const Fps<T,2> &f,const Fps<T,2> &g,int d){static constexpr int m0 = 167772161; //m0<m1<m2必須static constexpr int m1 = 469762049;static constexpr int m2 = 754974721;static constexpr int m01 = 104391568;// 1/m0(mod m1)static constexpr int m12 = 399692502;// 1/m1(mod m2)static constexpr int m012 = 190329765;// 1/m0m1(mod m2)static int m0m1 = ll(m0)*m1 % T::mod();int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());vector<ll> ff,gg;ff.reserve(nf),gg.reserve(ng);for (int i=0;i<nf;++i) ff.push_back(f[i].val());for (int i=0;i<ng;++i) gg.push_back(g[i].val());vector<ll> h0=convolution<m0>(ff,gg);vector<ll> h1=convolution<m1>(ff,gg);vector<ll> h2=convolution<m2>(ff,gg);Fps<T,2> ret(d);int nn=min(d+1,(int)h0.size());ret.reserve(nn);for (int i=0;i<nn;++i){ll r0=h0[i],r1=h1[i],r2=h2[i];ll s0=r0;ll s1=(r1+m1-s0)*m01%m1; //s0<m1のため正になるll s2=((r2+m2-s0)*m012+(m2-s1)*m12)%m2; //s0,s1<m2のため正になるret.emplace_back(s0+s1*m0+s2*m0m1);}return ret;}#if 0 //f*g mod x^(d+1) FFT畳み込み 使用時はFFTライブラリを貼った上で1にするtemplate<class T>Fps<T,3> Prod(const Fps<T,3> &f,const Fps<T,3> &g,int d){vector<T> ff(f.begin(),f.end()),gg(g.begin(),g.end());vector<T> hh = ArbitraryModConvolution::CooleyTukey::multiply(ff,gg);if ((int)hh.size()>d+1) hh.resize(d+1);return Fps<T,3>(hh.begin(),hh.end(),d);}#endif/********* I/F関数 *********/template<class FPS,class T=typename FPS::value_type> FPS prodtwopow(//f^k*g^msparseFps<T> f_,ll k,sparseFps<T> g_,ll m,Int dmx,const vector<T> &invs=vector<T>()){if (k==0) f_={{T(1),0},},k=1;if (m==0) g_={{T(1),0},},m=1;Int fz=f_.lowdeg(),gz=g_.lowdeg();assert(!(fz==Int(1e9) && k<0) && !(gz==Int(1e9) && m<0));//f=0かつk>0はNGif (fz==Int(1e9) || gz==Int(1e9)) return FPS(dmx);//f=0なら結果=0ll z=fz*k+gz*m; //k,m巨大時のoverflowは未対応とするassert(z>=0);if (ll(dmx)<z) return FPS(dmx);sparseFps<T> f=f_.shift(-fz),g=g_.shift(-gz);Int dmx2=dmx-z;sparseFps<T> a=f*g,b=f.diff()*g*k+f*g.diff()*m;T h0=POW(f.co(0),k)*POW(g.co(0),m);FPS h=de_sparse<FPS>(a,b,h0,dmx2,invs);return h.setdmx(dmx).shift(Int(z));}}//namespace fpsspace#if 0using fpsT = dd;using fps = fpsspace::Fps<fpsT,0>; //0:畳み込み不使用#elif 0using fpsT = mll;using fps = fpsspace::Fps<fpsT,1>; //1:NTTfriendly mod#elif 1using fpsT = atcoder::modint;using fps = fpsspace::Fps<fpsT,2>; //2:任意mod#elif 0using fpsT = dd;using fps = fpsspace::Fps<fpsT,3>; //3:FFT#endifusing spfps = fpsspace::sparseFps<fpsT>;/*- 各種演算の結果の次数上限は、一部例外を除きf,gの小さい方となる。- 疎FPSクラスは次数昇順、係数≠0必須- -------- コンストラクタ --------fps f; //f(x)=0 次数上限1e6fps f(d); // 〃 〃 dfps f{2,3,4,}; //f(x)=2+3x+4x^2 次数上限1e6fps f({2,3,4,},d); // 〃 〃 dfps f(all(v)); //vll等のvをコピー 次数上限1e6fps f(all(v),d); // 〃 〃 d- -------- コンストラクタ疎版 -------- vector<pair>と同じspfps sf={{4,2},{-1,5}}; //f(x)=4x^2-x^5sf.set(c,d); //c*x^dを末尾に追加- -------- 演算子(fps同士) --------f+=g f-=g f+g f-g -f 疎f+=疎g 疎f*=疎g 疎f+疎g 疎f*疎gf*=g f*g //NTTmod,任意mod,愚直がテンプレートで切り替わるf*=疎g f*疎g //愚直f/=g f/=疎g f/g f/疎g //漸化式で愚直 g定数項≠0- -------- 演算子(定数) --------f+=c f-=c f*=c f/=c f+c f-c f*c f/c 疎f*=c 疎f*c- -------- アクセス・操作 --------f[i]=val; //直接操作f.at(i)=val; //自動サイズ調整有ll n=f.size(); //項数(次数+1) leading zero含むll d=f.deg(); //非0の最高次の次数 f(x)=0の時-1ll d=f.lowdeg(); //非0の最低次の次数 f(x)=0の時1e9f.setdmx(d); //次数上限をx^dにセット & mod x^(d+1) d≧0f.fit(); //最高次≠0になるよう縮めるfps f(sf); //疎f→f 変換fps f(sf,d); //疎f→f 変換 次数上限dspfps sf=f.tosparse(); //f→疎f 変換- -------- 演算 --------mll c=f.prod1(g,k); //[x^k]f*gmll c=f.bostanmori(g,k);//[x^k]f/g g定数項≠0 k巨大(10^18)でもOKf.cut(d); //x^dまでにするf.mod(n); //mod x^nfps g=f.shift(k); //f*x^k k負も可spfps sg=sf.shift(k); //疎f*x^k k負も可mll val=f.eval(c); //f(c)fps g=f.diff(); //微分fps g=f.integ(); //積分mll val=f.integrange(l,r); //定積分 ∫_l^r f dxfps g=f.inv(); //1/f 定数項≠0fps g=f.log(); //log f 定数項=1fps g=f.exp(); //exp f 定数項=0fps g=sf.exp<fps>(d); //exp 疎f 定数項=0fps g=f.pow(k); //f^k k負は未対応fps g=f.powdbl(k); //f^k doubling版fps g=sf.pow<fps>(k,d); //疎f^k 次数上限d k負も可(定数項≠0必須)fps g=f.powsparse(k); //疎f^k k負も可(定数項≠0必須)auto[h,r]=f.div(g); //多項式の除算・剰余 h=f/g,r=f%g 次数上限はfの方fps Q=f.berlekamp_massey(); //f=P/QのQを復元 fは2d-1次、Qはd次 Qのdmx=dmll c=f.nthterm(k); //[x^k]f 線形漸化式を仮定 k巨大(10^18)でもOKf.estimate(); //次数上限まで推定 線形漸化式を仮定f.estimate(d); //d次まで推定 線形漸化式を仮定fps F=fpsspace::de_sparse<fps>(sf,sg,F0,d); //微分方程式 疎f*F'=疎g*F 次数上限dfps h=fpsspace::prodtwopow<fps>(sf,k,sg,m,d); //疎f^k*疎g^m 次数上限d k,m負も可*/namespace fpsspace{template<class T,int Kind> Fps<T,Kind> prodallPque(vector<Fps<T,Kind>> &fs){using FPS=Fps<T,Kind>;if (fs.empty()) return FPS{1};auto comp=[](const FPS &a,const FPS &b){ return a.size() > b.size(); };priority_queue<FPS,vector<FPS>,decltype(comp)> pq(comp);for (FPS &f: fs) pq.push(move(f));while (pq.size()>1U){FPS f=move(pq.top()); pq.pop();FPS g=move(pq.top()); pq.pop();pq.push(f*g);}return move(pq.top());}template<class T,int Kind> Fps<T,Kind> prodall(vector<Fps<T,Kind>> &fs){using FPS=Fps<T,Kind>;if (fs.empty()) return FPS{1};deque<FPS> dq;for (FPS &f: fs) dq.push_back(move(f));while (dq.size()>1U){dq.push_back(dq[0]*dq[1]);dq.pop_front();dq.pop_front();}return move(dq[0]);}/*- -------- 総積 Πfs[i] fsは破壊されるfps g=fpsspace::prodallPque(fs); //priority_queue版fps g=fpsspace::prodall(fs); //deque版*/}template<class T> struct combination_{vector<T> f,g; ll mxN=0;combination_(){}combination_(ll maxN): f(maxN+1,1),g(maxN+1),mxN(maxN) {for (ll i=1;i<=mxN;++i) { f[i]=f[i-1]*i; }g[mxN]=1/f[mxN];for (ll i=mxN;i>=1;--i) { g[i-1]=g[i]*i; }}T P(ll n,ll r){ return (n<0 || r<0 || n<r) ? T(0) : f[n]*g[n-r]; } //nPrT H(ll n,ll r){ return operator()(n+r-1,n-1); }//nHrT inv(ll n) { return f[n-1] * g[n]; } //1/nT fact(ll n) { return f[n]; } //n!T finv(ll n) { return g[n]; } //1/n!T operator()(ll n,ll r){if (r<0) return 0;if (n<0) return operator()(-n+r-1,r) * ((r&1)?-1:1); //-nCr = (-1)^r * n+r-1Crif (n<r) return 0;if (n<=mxN) return f[n]*g[n-r]*g[r]; //通常//n巨大、rかn-r小if (n-r<r) r=n-r;T bunsi=1,bunbo=1;for (ll i=0;i<r;++i) bunsi*=n-i;for (ll i=0;i<r;++i) bunbo*=i+1;return bunsi/bunbo;}template<class SP>vector<T> CnLnR(long long nL,long long nR,long long r,SP sp){if (nR-nL+1<=0) return vector<T>();if (r<0) return vector<T>(nR-nL+1,0);vector<T> v=sp(nL-r+1,nR-r+1,r);for (T& e: v) e*=finv(r);return v;}template<class SP>vector<T> HrLrR(long long n,long long rL,long long rR,SP sp){//r<0不可return CnLnR(n-1+rL,n-1+rR,n-1,sp);}};using combination = combination_<modint>;void cin2solve(){ll n,m,P;cin >> n >> m >> P;modint::set_mod((int)P);combination cmb(n);vector<fps> fs;rep(i,1,n) fs.push_back(fps{1,i});fps g=fpsspace::prodall(fs);modint ans=0;rep(k,1,m){modint va=cmb(m,k);va*=cmb.P(n-k,m-k);va*=g.at(k);ans+=va;}ans/=cmb.P(n,m);cout << ans.val() << '\n';return;}//////////////////////////////////////////int main(){#if 1//SolvingSpace::labo();cin2solve();//SolvingSpace::generand();#elsell t; cin >> t;rep(i,0,t-1){SolvingSpace::cin2solve();//SolvingSpace::generand();}#endifcerr << timeget() <<"ms"<< '\n';return 0;}