結果

問題 No.2975 単調増加部分積
ユーザー hamamu
提出日時 2024-02-07 22:27:19
言語 C++17
(gcc 13.3.0 + boost 1.87.0)
結果
AC  
実行時間 39 ms / 10,000 ms
コード長 31,400 bytes
コンパイル時間 5,370 ms
コンパイル使用メモリ 289,052 KB
最終ジャッジ日時 2025-02-19 02:48:51
ジャッジサーバーID
(参考情報)
judge1 / judge1
このコードへのチャレンジ
(要ログイン)
ファイルパターン 結果
other AC * 24
権限があれば一括ダウンロードができます

ソースコード

diff #
プレゼンテーションモードにする

#if !defined(MYLOCAL)//
#pragma GCC optimize("Ofast")
#if defined(NDEBUG)
#undef NDEBUG
#endif
#include "bits/stdc++.h"
#if !defined(_MSC_VER) && __has_include(<atcoder/all>)
#include <atcoder/all>
using namespace atcoder;
#endif
using namespace std;
using ll=long long; using dd=double;
using vll=vector< ll>; using vdd=vector< dd>;
using vvll=vector< vll>; using vvdd=vector<vdd>;
using vvvll=vector< vvll>; using vvvdd=vector<vvdd>;
using vvvvll=vector<vvvll>;
using pll=pair<ll,ll>; using tll=tuple<ll,ll,ll>; using qll=tuple<ll,ll,ll,ll>;
using vpll=vector< pll>; using vtll=vector< tll>; using vqll=vector< qll>;
using vvpll=vector<vpll>; using vvtll=vector<vtll>; using vvqll=vector<vqll>;
using namespace chrono;
constexpr ll INF = 1001001001001001001;
struct Fast{ Fast(){ cin.tie(0); ios::sync_with_stdio(false); cout<<fixed<<setprecision(numeric_limits<double>::max_digits10); } } fast;
#define REPS(i, S, E) for (ll i = (S); i <= (E); i++)
#define REP(i, N) REPS(i, 0, (N)-1)
#define DEPS(i, S, E) for (ll i = (E); i >= (S); i--)
#define DEP(i, N) DEPS(i, 0, (N)-1)
#define EXPAND( x ) x//VS
#define overload3(_1,_2,_3,name,...) name
#define overload4(_1,_2,_3,_4,name,...) name
#define overload5(_1,_2,_3,_4,_5,name,...) name
#define rep3(i, S, E) for (ll i = (S); i <= (E); i++)
#define rep4(i, S, E, t) for (ll i = (S); i <= (E); i+=(t))
#define rep(...) EXPAND(overload4(__VA_ARGS__,rep4,rep3,_,_)(__VA_ARGS__))
#define dep3(i, E, S) for (ll i = (E); i >= (S); i--)
#define dep4(i, E, S, t) for (ll i = (E); i >= (S); i-=(t))
#define dep(...) EXPAND(overload4(__VA_ARGS__, dep4, dep3,_,_)(__VA_ARGS__))
#define each2(e,v) for (auto && e:v)
#define each3(a,b,v) for (auto &&[a,b]:v)
#define each4(a,b,c,v) for (auto &&[a,b,c]:v)
#define each5(a,b,c,d,v) for (auto &&[a,b,c,d]:v)
#define each(...) EXPAND(overload5(__VA_ARGS__,each5,each4,each3,each2,_)(__VA_ARGS__))
#define ALL1(v) (v).begin(), (v).end()
#define ALL2(v,E) (v).begin(), (v).begin()+((E)+1)
#define ALL3(v,S,E) (v).begin()+(S), (v).begin()+((E)+1)
#define ALL(...) EXPAND(overload3(__VA_ARGS__, ALL3, ALL2, ALL1)(__VA_ARGS__))
#define all ALL
#define RALL1(v) (v).rbegin(), (v).rend()
#define RALL2(v,E) (v).rbegin(), (v).rbegin()+((E)+1)
#define RALL3(v,S,E) (v).rbegin()+(S), (v).rbegin()+((E)+1)
#define RALL(...) EXPAND(overload3(__VA_ARGS__, RALL3, RALL2, RALL1)(__VA_ARGS__))
#define rall RALL
template<class T> inline bool chmax(T &a, T b) { if (a < b) { a = b; return true; }return false; }
template<class T> inline bool chmin(T &a, T b) { if (a > b) { a = b; return true; }return false; }
template<class T> inline T MaxE(vector<T>&v,ll S,ll E){ T m=v[S]; rep(i,S,E)chmax(m,v[i]); return m; }
template<class T> inline T MinE(vector<T>&v,ll S,ll E){ T m=v[S]; rep(i,S,E)chmin(m,v[i]); return m; }
template<class T> inline T MaxE(vector<T> &v) { return MaxE(v,0,(ll)v.size()-1); }
template<class T> inline T MinE(vector<T> &v) { return MinE(v,0,(ll)v.size()-1); }
template<class T> inline auto maxe(T &&v,ll S,ll E){ return *max_element(ALL(v,S,E)); }
template<class T> inline auto maxe(T &&v){ return *max_element(ALL(v)); }
template<class T> inline auto mine(T &&v,ll S,ll E){ return *min_element(ALL(v,S,E)); }
template<class T> inline auto mine(T &&v){ return *min_element(ALL(v)); }
template<class T> inline T Sum(vector<T> &v,ll S,ll E){ T s=T(); rep(i,S,E)s+=v[i]; return s; }
template<class T> inline T Sum(vector<T> &v) { return Sum(v,0,v.size()-1); }
template<class T,class U=typename remove_reference<T>::type::value_type>
inline U sum(T &&v,ll S,ll E) {return accumulate(all(v,S,E),U());}
template<class T> inline auto sum(T &&v) {return sum(v,0,v.end()-v.begin()-1);}
template<class T> inline ll sz(T &&v){ return (ll)v.size(); }
inline ll CEIL(ll a,ll b){ return (a<0) ? -(-a/b) : (a+b-1)/b; } //OK
inline ll FLOOR(ll a,ll b){ return -CEIL(-a,b); } //OK
//pair
template<class T,class S> inline pair<T,S>& operator+=(pair<T,S> &a,const pair<T,S> &b){ a.first+=b.first; a.second+=b.second; return a; }
template<class T,class S> inline pair<T,S>& operator-=(pair<T,S> &a,const pair<T,S> &b){ a.first-=b.first; a.second-=b.second; return a; }
template<class T,class S> inline pair<T,S>& operator*=(pair<T,S> &a,const pair<T,S> &b){ a.first*=b.first; a.second*=b.second; return a; }
template<class T,class S> inline pair<T,S>& operator/=(pair<T,S> &a,const pair<T,S> &b){ a.first/=b.first; a.second/=b.second; return a; }
template<class T,class S> inline pair<T,S>& operator%=(pair<T,S> &a,const pair<T,S> &b){ a.first%=b.first; a.second%=b.second; return a; }
template<class T,class S,class R> inline pair<T,S>& operator+=(pair<T,S> &a,R b){ a.first+=b; a.second+=b; return a; }
template<class T,class S,class R> inline pair<T,S>& operator-=(pair<T,S> &a,R b){ a.first-=b; a.second-=b; return a; }
template<class T,class S,class R> inline pair<T,S>& operator*=(pair<T,S> &a,R b){ a.first*=b; a.second*=b; return a; }
template<class T,class S,class R> inline pair<T,S>& operator/=(pair<T,S> &a,R b){ a.first/=b; a.second/=b; return a; }
template<class T,class S,class R> inline pair<T,S>& operator%=(pair<T,S> &a,R b){ a.first%=b; a.second%=b; return a; }
template<class T,class S,class R> inline pair<T,S> operator+(const pair<T,S> &a,R b){ pair<T,S> c=a; return c+=b; }
template<class T,class S,class R> inline pair<T,S> operator-(const pair<T,S> &a,R b){ pair<T,S> c=a; return c-=b; }
template<class T,class S,class R> inline pair<T,S> operator*(const pair<T,S> &a,R b){ pair<T,S> c=a; return c*=b; }
template<class T,class S,class R> inline pair<T,S> operator/(const pair<T,S> &a,R b){ pair<T,S> c=a; return c/=b; }
template<class T,class S,class R> inline pair<T,S> operator%(const pair<T,S> &a,R b){ pair<T,S> c=a; return c%=b; }
template<class T,class S,class R> inline pair<T,S> operator-(R b,const pair<T,S> &a){ pair<T,S> c=-a; return c+=b; }
template<class T,class S> inline pair<T,S> operator-(const pair<T,S> &a,const pair<T,S> &b){ pair<T,S> c=a; return c-=b; }
template<class T,class S> inline pair<T,S> operator-(const pair<T,S> &a){ pair<T,S> c=a; return c*=(-1); }
template<class T,class S> inline ostream &operator<<(ostream &os,const pair<T,S> &a){ return os << a.first << ' ' << a.second; }
//tuple
template<class T,class S,class R> inline ostream &operator<<(ostream &os,const tuple<T,S,R> &a){ return os << get<0>(a) << ' ' << get<1>(a) << ' ' <<
    get<2>(a); }
template<class T,class S,class R,class Q> inline ostream &operator<<(ostream &os,const tuple<T,S,R,Q> &a){ return os << get<0>(a) << ' ' << get<1>(a)
    << ' ' << get<2>(a) << ' ' << get<3>(a); }
//vector
template<class T> inline ostream &operator<<(ostream &os,const vector<T> &a){ for (ll i=0; i<(ll)a.size(); i++) os<<(i>0?" ":"")<<a[i]; return os; }
#endif//end
#if defined(_MSC_VER) && __has_include(<atcoder/all>)
#include <atcoder/all>
using namespace atcoder;
#endif
struct{
system_clock::time_point st = system_clock::now();
ll operator()()const{return duration_cast<microseconds>(system_clock::now()-st).count()/1000;}
} timeget;
//////////////////////////////////////////
namespace fpsspace{
using Int = long long;
using ll = long long;
constexpr int inf=int(1e9);
/********* utility *********/
template<class T> T POW(T a,ll n){//a^n n
if (n<0) a=T(1)/a,n=-n;
T r=1;
for (; n>0; n>>=1,a*=a) if (n&1)r*=a;
return r;
}
ll LimitMul(ll a,ll b,ll l=ll(9e18)){//min(a*b,l) a,b≧0
return (b==0 || a<=l/b) ? a*b : l;
}
/*---- 1/i i=1~d ----*/
template<int Kind> struct Wrap{};//Kind
template<class T,int Kind,class=enable_if_t<Kind==1 || Kind==2>>
vector<T> &Invs(int d,Wrap<Kind>){//Kind=1 or 2(modint)
static vector<T> invs(2,T(1));
int MOD = T::mod();
for (int i=(int)invs.size();i<=d;++i) invs.push_back(-invs[MOD%i]*T(MOD/i));
return invs;
}
template<class T> vector<T> &Invs(int d,Wrap<0>){//
static vector<T> invs(1);
for (int i=(int)invs.size();i<=d;++i) invs.push_back(T(1)/i);
return invs;
}
template<class T> vector<T> &Fact(int d){// i! i=0~d
static vector<T> fact(1,T(1));
for (int i=(int)fact.size();i<=d;++i) fact.push_back(fact.back()*T(i));
return fact;
}
template<class T,int Kind> vector<T> &FInv(int d){// 1/i! i=0~d
static vector<T> finv(1,T(1));
const vector<T> &invs=Invs<T>(d,Wrap<Kind>{});
for (int i=(int)finv.size();i<=d;++i) finv.push_back(finv.back()*invs[i]);
return finv;
}
// Berlekamp Massey 2L-1A(x)A=P/QQL Kind=1,2
template <class T> vector<T> BerlekampMassey(const vector<T> &a){
vector<T> C={1},B={1};//C:B:1C
int m=1; //
T b=T(1); //d
auto C_update=[](vector<T> &C,T d,T b,vector<T> &B,int m){
T d_b=d/b;
int M=(int)B.size();
if ((int)C.size()<M+m) C.resize(M+m);
for (int i=0;i<M;++i) C[i+m]-=d_b*B[i];
};
for (int n=0;n<(int)a.size();++n){
T d=T(0);
for (int k=0;k<(int)C.size();++k) d+=C[k]*a[n-k]; //d
if (d!=T(0)){//①d=0CAn,調
if (2*((int)C.size()-1) <= n){
vector<T> tmp=C;
C_update(C,d,b,B,m); //C -= d/b * (Bm)
B.swap(tmp); b=d; m=0;
}
else C_update(C,d,b,B,m); //C -= d/b * (Bm)
}
m++;
}
return C;
}
template<class FPS,class SPFPS,class T=typename FPS::value_type,class S>
FPS de_sparse( //a*F'=b*FF
const SPFPS &a_,const SPFPS &b_,S f0,Int dmx_,const vector<T> &invs_=vector<T>())
{
assert(a_.lowdeg()<=b_.lowdeg());
int dmx=(int)dmx_;
const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dmx,Wrap<FPS::kind>{});
SPFPS a=a_.shift(-a_.lowdeg()),b=b_.shift(-a_.lowdeg());
T a0inv=T(1)/a.co(0);
a*=a0inv,b*=a0inv;
a.erase(a.begin());
FPS f({T(f0)},dmx);
for (int d=1;d<=dmx;++d){
for (auto [bb,i]:b){
if (d-1-i>=0) f.at(d)+=bb*f[d-1-i];
}
for (auto [aa,i]:a){
if (d-i>=0) f.at(d)-=aa*f[d-i]*(d-i);
}
f.at(d)*=invs[d];
}
return f;
}
/********* FPS *********/
template<class T> struct sparseFps: vector<pair<T,Int>>{
using vector<pair<T,Int>>::vector; //
sparseFps &Norm(){//ddcoco=0
sort(this->begin(),this->end(),
[](const auto &x,const auto &y){return x.second<y.second;});
int j=-1;
for (int i=0;i<this->size();++i){
if (j>=0 && deg(j)==deg(i)){
co(j)+=co(i);
}
else{
if (!(j>=0 && co(j)==T(0))) ++j;
(*this)[j]=(*this)[i];
}
}
if (j>=0 && co(j)==T(0)) --j;
this->resize(j+1);
return *this;
}
/*---- I/F ----*/
template<class S,class R>
void set(S co,R deg){ this->emplace_back(T(co),Int(deg)); }
Int deg()const{ return this->empty() ? -1 : this->back().second; }//
T co(Int i)const{ return (*this)[i].first; }//(*this)[i]
T &co(Int i) { return (*this)[i].first; }
Int deg(Int i)const{ return (*this)[i].second; }//(*this)[i]
Int &deg(Int i) { return (*this)[i].second; }
Int lowdeg()const{ return this->empty() ? inf : this->front().second; }
sparseFps &operator+=(const sparseFps &sg){
this->insert(this->end(),sg.begin(),sg.end());
return Norm();
}
sparseFps operator+(const sparseFps &sg)const{ return sparseFps(*this)+=sg; }
sparseFps &operator*=(T b){ for (auto&&[c,_]:*this) c*=b; return *this; }
sparseFps operator*(T b)const{ return sparseFps(*this)*=b; }
sparseFps &operator*=(const sparseFps &sg){ return *this=*this*sg; }
sparseFps operator*(const sparseFps &sg)const{
sparseFps ret;
for (auto&&[cf,df]:*this) for (auto&&[cg,dg]:sg) ret.set(cf*cg,df+dg);
return ret.Norm();
}
sparseFps shift(Int k)const{ // *x^k
sparseFps ret;
for (auto&&[co,d]:*this) if (d+k>=0) ret.set(co,d+k);
return ret;
}
sparseFps diff()const{
sparseFps ret;
for (auto&&[co,d]:*this) if (d>0) ret.set(co*d,d-1);
return ret;
}
template<class FPS> FPS exp(Int dmx)const{
assert(lowdeg()!=0); //=0
return de_sparse<FPS>(sparseFps{{1,0},},diff(),1,dmx);
}
template<class FPS>
FPS pow(ll k,Int dmx,const vector<T> &invs_=vector<T>())const{
assert(!(k<0 && lowdeg()>0));//k
if (k==0) return FPS({1},dmx);
//-- dk<0dmxk>0min(dmx,deg()*k)
int d = (k<0 || LimitMul(deg(),k)>(ll)dmx) ? int(dmx) : int(deg()*k);
//-- invs[i]=1/ii=1~d()
const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(d,Wrap<FPS::kind>{});
//--
int s=(int)lowdeg();//
if (k>0 && LimitMul(s,k)>(ll)dmx) return FPS(dmx);//all0
//--
T f0inv=T(1)/co(0);
FPS g({POW(co(0),k)},dmx);
for (int i=1;i<=d-s*k;++i){ //ks=0OK
for (int j=1;j<(int)this->size();++j){
auto [c,dg]=(*this)[j];
int b=int(dg)-s;
if (i-b<0)break;
g.at(i)+=c*g.at(i-b)*(T(k)*b-i+b);
}
g.at(i)*=f0inv*invs[i];
}
return g.shift(Int(s*k));
}
};
/********* FPS *********/
template<
class T, //
int Kind // 0:1:NTTfriendly mod2:mod
>
struct Fps: vector<T>{
static_assert(0<=Kind && Kind<=3);
static constexpr int kind=Kind;
int dMx=int(1e6); //(x^dMx)
using vT = vector<T>;
/*---- utility ----*/
int isize()const{ return (int)vector<T>::size(); }
int NormSize()const{//leading zero const
int sv=isize();
while (sv>0 && (*this)[sv-1]==T(0)) --sv;
return sv;
}
int Deg()const{ return NormSize()-1; } // const
Fps &Cut(){ return cut(dMx); }
Fps &ZeroExtend(){
int anm=max(0,dMx-isize()+1);
vT::insert(vT::end(),anm,T(0));
return *this;
}
int MinD(const Fps &g)const{ return min(dMx,g.dMx); }
void MergeD(const Fps &g){ dMx=MinD(g); Cut(); }
template <int Sign> Fps &Add(const Fps &g){
MergeD(g);
for (int i=min(dMx,g.Deg());i>=0;--i) at(i)+=Sign*g[i];
return *this;
}
Fps ProdSparse(const sparseFps<T> &g,int d)const{//f*g mod x^(d+1)
Fps ret(d);
for (auto&&[co,dg]:g) for (int i=0;i<(int)isize();++i){
if (dg+i>d) break;
ret.at(dg+i)+=co*(*this)[i];
}
return ret;
}
Fps InvSparse(const sparseFps<T> &g,int d)const{//f/g mod x^(d+1) g0≠0
assert(!g.empty() && g.deg(0)==0 && g.co(0)!=0);
//-- g1
T c0inv=T(1)/g.co(0);
Fps ret=((*this)*c0inv).setdmx(d);
if (g.size()==1u) return ret;
sparseFps<T> gg=g*c0inv;
//-- DP
for (int i=0; i+(int)gg.deg(1)<=d; ++i){
for (int j=1; j<(int)gg.size(); ++j){
auto [co,dg]=gg[j];
int ii=i+(int)dg;
if (d<ii)break;
ret.at(ii)-=ret.at(i)*co;
}
}
return ret;
}
Fps &LogSparse( //f+=log(g^k),g=1+ax^b
const sparseFps<T> &g,ll k,const vector<T> &invs_=vector<T>())
{
assert(g.size()==2U && g.co(0)==T(1) && g.deg(0)==0);
const vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dMx,Wrap<Kind>{});
int b=(int)g.deg(1);
T c=g.co(1)*k;
for (int i=1;i*b<=dMx;++i,c*=-g.co(1)) at(i*b)+=c*invs[i];
return *this;
}
/*---- ----*/
explicit Fps(Int dmx=int(1e6)): dMx(int(dmx)){}
Fps(initializer_list<T> i,Int dmx=int(1e6)):
vT(i.begin(),i.end()),dMx(int(dmx)){ Cut(); }
template <class It,class=typename iterator_traits<It>::iterator_category>
Fps(It l,It r,Int dmx=int(1e6)): vT(l,r),dMx(int(dmx)){ Cut(); }
Fps(vector<T> &&v,Int dmx=int(1e6)): vT(move(v)),dMx(int(dmx)){}
Fps(const sparseFps<T> &sf,Int dmx=int(1e6)):dMx(int(dmx)){ //f → f
for (auto&&[co,deg]:sf) if (deg<=dmx) at(deg)=co;
}
/*---- I/F ----*/
sparseFps<T> tosparse()const{ //f → f
sparseFps<T> ret;
for (int i=0;i<isize();++i){
if ((*this)[i]!=T(0)) ret.set((*this)[i],i);
}
return ret;
}
Int size()const{ return (Int)vector<T>::size(); }
Int deg(){ fit(); return size()-1; }
Int lowdeg()const{
for (int i=0;i<isize();++i){
if ((*this)[i]!=T(0)) return i;
}
return inf;
}
Fps &setdmx(Int dmx){ dMx=(int)dmx; return Cut(); }
T at(Int i)const{ return size()<=i ? T(0) : (*this)[i]; }
T &at(Int i){
if (size()<=i) this->resize(i+1);
return (*this)[i];
}
Fps &fit(){
this->resize(NormSize());
return *this;
}
Fps &operator+=(const Fps &g){ return Add<1>(g); }
Fps &operator-=(const Fps &g){ return Add<-1>(g); }
Fps &operator*=(const Fps &g){ return *this=*this*g; }
Fps &operator/=(const Fps &g){ return *this=*this/g; }
Fps &operator*=(const sparseFps<T> &g){ return *this=*this*g; }
Fps &operator/=(const sparseFps<T> &g){ return *this=*this/g; }
Fps &operator+=(T c){ at(0)+=c; return *this; }
Fps &operator-=(T c){ at(0)-=c; return *this; }
Fps &operator*=(T c){ for (auto&& e: *this) e*=c; return *this; }
Fps &operator/=(T c){ return (*this)*=T(1)/c; }
Fps operator+(const Fps &g)const{ return Fps(*this)+=g; }
Fps operator-(const Fps &g)const{ return Fps(*this)-=g; }
Fps operator*(const Fps &g)const{ return Prod(*this,g,MinD(g)); }
Fps operator/(const Fps &g)const{ return InvSparse(g.tosparse(),MinD(g)); }
Fps operator*(const sparseFps<T> &g)const{ return ProdSparse(g,dMx); }
Fps operator/(const sparseFps<T> &g)const{ return InvSparse(g,dMx); }
Fps operator+(T c)const{ return Fps(*this)+=c; }
Fps operator-(T c)const{ return Fps(*this)-=c; }
Fps operator*(T c)const{ return Fps(*this)*=c; }
Fps operator/(T c)const{ return Fps(*this)/=c; }
Fps operator-()const{ return Fps(*this)*=T(-1); }
friend Fps operator+(T c,const Fps &f){ return f+c; }
friend Fps operator-(T c,const Fps &f){ return -f+c; }
friend Fps operator*(T c,const Fps &f){ return f*c; }
T prod1(const Fps &g,Int k_)const{ //[x^k]f*g
int df=Deg(),dg=g.Deg(),k=(int)k_;
if (MinD(g)<k) return T(0);
T ret=T(0);
for (int i=max(0,k-dg),j=k-i; i<=df&&j>=0; ++i,--j) ret+=(*this)[i]*g[j];
return ret;
}
T bostanmori(const Fps &g,ll k)const{ //[x^k]f/g
assert(g.at(0)!=0);
Fps P=Fps(*this).setdmx(inf),Q=Fps(g).setdmx(inf);
for (; k>0; k>>=1){
Fps Q1=Q;
for (int i=1;i<Q1.isize();i+=2) Q1[i]*=-1; //Q1=(Q)
Fps PQ=P*Q1,QQ=Q*Q1;
P.clear(),Q.clear();
for (int i=k&1;i<PQ.isize();i+=2) P.push_back(PQ[i]);//P=(PQor)
for (int i=0; i<QQ.isize();i+=2) Q.push_back(QQ[i]);//Q=(QQ)
}
return P.at(0)/Q[0];
}
Fps berlekamp_massey(Int d)const{ //f=P/QQ x^d(d)
assert(d%2==1);
vector<T> f;
for (int i=0;i<=d;++i) f.push_back(at(i));
vector<T> Q=BerlekampMassey(f);
Int dmx=Int(Q.size()-1);
return Fps(move(Q),dmx);
}
T nthterm(Int d,ll k)const{ //[x^k]f x^d(d)
Fps Q=berlekamp_massey(d);
Fps P=Prod(*this,Q,Q.dMx-1).fit();
return P.bostanmori(Q,k);
}
Fps &estimate(Int d,Int dmx=-1){ //dmx x^d(d)
if (dmx==-1) dmx=dMx;
Fps Q=berlekamp_massey(d);
Fps P=Prod(*this,Q,Q.dMx-1).fit().setdmx(dmx);
return *this=(Q.setdmx(dmx).inv()*P).ZeroExtend();
}
Fps &cut(Int d){ //x^d
if (d+1<size()) vT::resize(size_t(d+1));
return *this;
}
Fps &mod(Int n){ return cut(n-1); } //mod x^n
[[nodiscard]] Fps shift(Int k_)const{ // *x^k
Fps ret(dMx);
const int k=(int)k_,m=min(isize()+k,dMx+1); //
if (m<=0 || dMx<k) return ret; //
for (int i=m-1-k;i>=max(0,-k);--i) ret.at(i+k)=(*this)[i];
return ret;
}
T eval(T x)const{ //f(c)
T ret=T(0);
for (int i=isize()-1;i>=0;--i) ret*=x,ret+=(*this)[i];
return ret;
}
Fps diff()const{ //
Fps ret(dMx-1);
for (int i=Deg();i>=1;--i) ret.at(i-1)=(*this)[i]*i;
return ret;
}
Fps integ()const{ //
Fps ret(dMx+1);
for (int i=min(Deg(),dMx); i>=0; --i) ret.at(i+1)=(*this)[i]/(i+1);
return ret;
}
T integrange(T l,T r)const{ // ∫_l^r f dx
Fps itg=integ();
return itg.eval(r)-itg.eval(l);
}
Fps inv()const{
assert(at(0)!=0);//≠0
Fps g{T(1)/at(0)};
for (int i=1;i<dMx+1;i*=2){//i:
g.setdmx(min(i*2-1,dMx));
g = g+g-g*g*(*this);
}
return g;
}
Fps log()const{ //log f
assert(at(0)==T(1));//=1
return (diff()*inv()).integ();
}
Fps exp()const{ //exp f
assert(at(0)==T(0));//=0
Fps g{1};
for (int i=1;i<dMx+1;i*=2){//i:
g.setdmx(min(i*2-1,dMx));
g = g*(T(1)-g.log()+(*this));
}
return g;
}
Fps pow(ll k)const{ //f^k k<0
if (k==0) return Fps({1},dMx);
if (k==1) return *this;
int z=(int)lowdeg();
if (z==inf || z>int(dMx/k)) return Fps(dMx);//f(x)=0=0
int m=int(dMx+1-z*k); //z*kdMx+1-z*kok
Fps g=shift(-z).setdmx(m-1)/at(z); //1
Fps gk=(g.log()*k).exp(); //g^k
Fps ret=(gk*POW(at(z),k)).setdmx(dMx).shift(Int(z*k)); //
return ret;
}
Fps powdbl(ll k)const{ //f^k
Fps ret({1},dMx),g=*this;
for (; k>0; k>>=1,g*=g) if (k&1)ret*=g;
return ret;
}
Fps powsparse(ll k,const vector<T> &invs=vector<T>())const{ //f^k
return tosparse().template pow<Fps>(k,dMx,invs);
}
pair<Fps,Fps> div(const Fps &g)const{ //f/g,f%g
const Fps &f=*this;
int na=f.NormSize(),nb=g.NormSize();
assert(nb>0);
int n=na-nb+1;//
if (n<=0) return {Fps(dMx),f};
int nu=f.isize(),nv=g.isize();
Fps aR(f.rbegin()+nu-na,f.rbegin()+min(nu-na+n,nu),n-1);
Fps bR(g.rbegin()+nv-nb,g.rbegin()+min(nv-nb+n,nv),n-1);
Fps qR=bR.inv()*aR;
qR.resize(n);
reverse(qR.begin(),qR.end());
qR.fit().setdmx(dMx);
Fps r=(f-Prod(qR,g,dMx)).fit();
return {move(qR),move(r)};
}
};
/********* NTTmodmod使 *********/
template<class T> //f*g mod x^(d+1) 使
Fps<T,0> Prod(const Fps<T,0> &f,const Fps<T,0> &g,int d){
return f.ProdSparse(g.tosparse(),d);
}
template<class T> //f*g mod x^(d+1) NTTmod
Fps<T,1> Prod(const Fps<T,1> &f,const Fps<T,1> &g,int d){
int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());
vector<ll> ff,gg;
ff.reserve(nf),gg.reserve(ng);
for (int i=0;i<nf;++i) ff.push_back(f[i].val());
for (int i=0;i<ng;++i) gg.push_back(g[i].val());
vector<ll> hh=convolution<T::mod()>(ff,gg);
if ((int)hh.size()>d+1) hh.resize(d+1);
return Fps<T,1>(hh.begin(),hh.end(),d);
}
template<class T> //f*g mod x^(d+1) mod
Fps<T,2> Prod(const Fps<T,2> &f,const Fps<T,2> &g,int d){
static constexpr int m0 = 167772161; //m0<m1<m2
static constexpr int m1 = 469762049;
static constexpr int m2 = 754974721;
static constexpr int m01 = 104391568;// 1/m0(mod m1)
static constexpr int m12 = 399692502;// 1/m1(mod m2)
static constexpr int m012 = 190329765;// 1/m0m1(mod m2)
static int m0m1 = ll(m0)*m1 % T::mod();
int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());
vector<ll> ff,gg;
ff.reserve(nf),gg.reserve(ng);
for (int i=0;i<nf;++i) ff.push_back(f[i].val());
for (int i=0;i<ng;++i) gg.push_back(g[i].val());
vector<ll> h0=convolution<m0>(ff,gg);
vector<ll> h1=convolution<m1>(ff,gg);
vector<ll> h2=convolution<m2>(ff,gg);
Fps<T,2> ret(d);
int nn=min(d+1,(int)h0.size());
ret.reserve(nn);
for (int i=0;i<nn;++i){
ll r0=h0[i],r1=h1[i],r2=h2[i];
ll s0=r0;
ll s1=(r1+m1-s0)*m01%m1; //s0<m1
ll s2=((r2+m2-s0)*m012+(m2-s1)*m12)%m2; //s0,s1<m2
ret.emplace_back(s0+s1*m0+s2*m0m1);
}
return ret;
}
#if 0 //f*g mod x^(d+1) FFT 使FFT1
template<class T>
Fps<T,3> Prod(const Fps<T,3> &f,const Fps<T,3> &g,int d){
vector<T> ff(f.begin(),f.end()),gg(g.begin(),g.end());
vector<T> hh = ArbitraryModConvolution::CooleyTukey::multiply(ff,gg);
if ((int)hh.size()>d+1) hh.resize(d+1);
return Fps<T,3>(hh.begin(),hh.end(),d);
}
#endif
/********* I/F *********/
template<class FPS,class T=typename FPS::value_type> FPS prodtwopow(//f^k*g^m
sparseFps<T> f_,ll k,sparseFps<T> g_,ll m,Int dmx,
const vector<T> &invs=vector<T>())
{
if (k==0) f_={{T(1),0},},k=1;
if (m==0) g_={{T(1),0},},m=1;
Int fz=f_.lowdeg(),gz=g_.lowdeg();
assert(!(fz==Int(1e9) && k<0) && !(gz==Int(1e9) && m<0));//f=0k>0NG
if (fz==Int(1e9) || gz==Int(1e9)) return FPS(dmx);//f=0=0
ll z=fz*k+gz*m; //k,moverflow
assert(z>=0);
if (ll(dmx)<z) return FPS(dmx);
sparseFps<T> f=f_.shift(-fz),g=g_.shift(-gz);
Int dmx2=dmx-z;
sparseFps<T> a=f*g,b=f.diff()*g*k+f*g.diff()*m;
T h0=POW(f.co(0),k)*POW(g.co(0),m);
FPS h=de_sparse<FPS>(a,b,h0,dmx2,invs);
return h.setdmx(dmx).shift(Int(z));
}
}//namespace fpsspace
#if 0
using fpsT = dd;
using fps = fpsspace::Fps<fpsT,0>; //0:使
#elif 0
using fpsT = mll;
using fps = fpsspace::Fps<fpsT,1>; //1:NTTfriendly mod
#elif 1
using fpsT = atcoder::modint;
using fps = fpsspace::Fps<fpsT,2>; //2:mod
#elif 0
using fpsT = dd;
using fps = fpsspace::Fps<fpsT,3>; //3:FFT
#endif
using spfps = fpsspace::sparseFps<fpsT>;
/*
- f,g
- FPS≠0
- -------- --------
fps f; //f(x)=0 1e6
fps f(d); // d
fps f{2,3,4,}; //f(x)=2+3x+4x^2 1e6
fps f({2,3,4,},d); // d
fps f(all(v)); //vllv 1e6
fps f(all(v),d); // d
- -------- -------- vector<pair>
spfps sf={{4,2},{-1,5}}; //f(x)=4x^2-x^5
sf.set(c,d); //c*x^d
- -------- (fps) --------
f+=g f-=g f+g f-g -f f+=g f*=g f+g f*g
f*=g f*g //NTTmod,mod,
f*=g f*g //
f/=g f/=g f/g f/g // g≠0
- -------- () --------
f+=c f-=c f*=c f/=c f+c f-c f*c f/c f*=c f*c
- -------- --------
f[i]=val; //
f.at(i)=val; //調
ll n=f.size(); //(+1) leading zero
ll d=f.deg(); //0 f(x)=0-1
ll d=f.lowdeg(); //0 f(x)=01e9
f.setdmx(d); //x^d mod x^(d+1) d≧0
f.fit(); //≠0
fps f(sf); //f→f
fps f(sf,d); //f→f d
spfps sf=f.tosparse(); //f→f
- -------- --------
mll c=f.prod1(g,k); //[x^k]f*g
mll c=f.bostanmori(g,k);//[x^k]f/g g≠0 k(10^18)OK
f.cut(d); //x^d
f.mod(n); //mod x^n
fps g=f.shift(k); //f*x^k k
spfps sg=sf.shift(k); //f*x^k k
mll val=f.eval(c); //f(c)
fps g=f.diff(); //
fps g=f.integ(); //
mll val=f.integrange(l,r); // ∫_l^r f dx
fps g=f.inv(); //1/f ≠0
fps g=f.log(); //log f =1
fps g=f.exp(); //exp f =0
fps g=sf.exp<fps>(d); //exp f =0
fps g=f.pow(k); //f^k k
fps g=f.powdbl(k); //f^k doubling
fps g=sf.pow<fps>(k,d); //f^k d k(≠0)
fps g=f.powsparse(k); //f^k k(≠0)
auto[h,r]=f.div(g); // h=f/g,r=f%g f
fps Q=f.berlekamp_massey(); //f=P/QQ f2d-1Qd Qdmx=d
mll c=f.nthterm(k); //[x^k]f k(10^18)OK
f.estimate(); //
f.estimate(d); //d
fps F=fpsspace::de_sparse<fps>(sf,sg,F0,d); // f*F'=g*F d
fps h=fpsspace::prodtwopow<fps>(sf,k,sg,m,d); //f^k*g^m d k,m
*/
namespace fpsspace{
template<class T,int Kind> Fps<T,Kind> prodallPque(vector<Fps<T,Kind>> &fs){
using FPS=Fps<T,Kind>;
if (fs.empty()) return FPS{1};
auto comp=[](const FPS &a,const FPS &b){ return a.size() > b.size(); };
priority_queue<FPS,vector<FPS>,decltype(comp)> pq(comp);
for (FPS &f: fs) pq.push(move(f));
while (pq.size()>1U){
FPS f=move(pq.top()); pq.pop();
FPS g=move(pq.top()); pq.pop();
pq.push(f*g);
}
return move(pq.top());
}
template<class T,int Kind> Fps<T,Kind> prodall(vector<Fps<T,Kind>> &fs){
using FPS=Fps<T,Kind>;
if (fs.empty()) return FPS{1};
deque<FPS> dq;
for (FPS &f: fs) dq.push_back(move(f));
while (dq.size()>1U){
dq.push_back(dq[0]*dq[1]);
dq.pop_front();
dq.pop_front();
}
return move(dq[0]);
}
/*
- -------- Πfs[i] fs
fps g=fpsspace::prodallPque(fs); //priority_queue
fps g=fpsspace::prodall(fs); //deque
*/
}
template<class T> struct combination_{
vector<T> f,g; ll mxN=0;
combination_(){}
combination_(ll maxN): f(maxN+1,1),g(maxN+1),mxN(maxN) {
for (ll i=1;i<=mxN;++i) { f[i]=f[i-1]*i; }
g[mxN]=1/f[mxN];
for (ll i=mxN;i>=1;--i) { g[i-1]=g[i]*i; }
}
T P(ll n,ll r){ return (n<0 || r<0 || n<r) ? T(0) : f[n]*g[n-r]; } //nPr
T H(ll n,ll r){ return operator()(n+r-1,n-1); }//nHr
T inv(ll n) { return f[n-1] * g[n]; } //1/n
T fact(ll n) { return f[n]; } //n!
T finv(ll n) { return g[n]; } //1/n!
T operator()(ll n,ll r){
if (r<0) return 0;
if (n<0) return operator()(-n+r-1,r) * ((r&1)?-1:1); //-nr = (-1)^r * n+r-1r
if (n<r) return 0;
if (n<=mxN) return f[n]*g[n-r]*g[r]; //
//nrn-r
if (n-r<r) r=n-r;
T bunsi=1,bunbo=1;
for (ll i=0;i<r;++i) bunsi*=n-i;
for (ll i=0;i<r;++i) bunbo*=i+1;
return bunsi/bunbo;
}
template<class SP>
vector<T> CnLnR(long long nL,long long nR,long long r,SP sp){
if (nR-nL+1<=0) return vector<T>();
if (r<0) return vector<T>(nR-nL+1,0);
vector<T> v=sp(nL-r+1,nR-r+1,r);
for (T& e: v) e*=finv(r);
return v;
}
template<class SP>
vector<T> HrLrR(long long n,long long rL,long long rR,SP sp){//r<0
return CnLnR(n-1+rL,n-1+rR,n-1,sp);
}
};
using combination = combination_<modint>;
void cin2solve()
{
ll n,m,P;
cin >> n >> m >> P;
modint::set_mod((int)P);
combination cmb(n);
vector<fps> fs;
rep(i,1,n) fs.push_back(fps{1,i});
fps g=fpsspace::prodall(fs);
modint ans=0;
rep(k,1,m){
modint va=cmb(m,k);
va*=cmb.P(n-k,m-k);
va*=g.at(k);
ans+=va;
}
ans/=cmb.P(n,m);
cout << ans.val() << '\n';
return;
}
//////////////////////////////////////////
int main(){
#if 1
//SolvingSpace::labo();
cin2solve();
//SolvingSpace::generand();
#else
ll t; cin >> t;
rep(i,0,t-1){
SolvingSpace::cin2solve();
//SolvingSpace::generand();
}
#endif
cerr << timeget() <<"ms"<< '\n';
return 0;
}
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
0