#include #include using namespace std; #define REP(i,a,b) for(i=a;i'9')break;*x=(*x)*10+k-'0';}if(m)(*x)=-(*x);} void reader(ll *x){int k,m=0;*x=0;for(;;){mygc(k);if(k=='-'){m=1;break;}if('0'<=k&&k<='9'){*x=k-'0';break;}}for(;;){mygc(k);if(k<'0'||k>'9')break;*x=(*x)*10+k-'0';}if(m)(*x)=-(*x);} void reader(double *x){scanf("%lf",x);} int reader(char c[]){int i,s=0;for(;;){mygc(i);if(i!=' '&&i!='\n'&&i!='\r'&&i!='\t'&&i!=EOF) break;}c[s++]=i;for(;;){mygc(i);if(i==' '||i=='\n'||i=='\r'||i=='\t'||i==EOF) break;c[s++]=i;}c[s]='\0';return s;} template void reader(T *x, S *y){reader(x);reader(y);} template void reader(T *x, S *y, U *z){reader(x);reader(y);reader(z);} template void reader(T *x, S *y, U *z, V *w){reader(x);reader(y);reader(z);reader(w);} void writer(int x, char c){int s=0,m=0;char f[10];if(x<0)m=1,x=-x;while(x)f[s++]=x%10,x/=10;if(!s)f[s++]=0;if(m)mypc('-');while(s--)mypc(f[s]+'0');mypc(c);} void writer(ll x, char c){int s=0,m=0;char f[20];if(x<0)m=1,x=-x;while(x)f[s++]=x%10,x/=10;if(!s)f[s++]=0;if(m)mypc('-');while(s--)mypc(f[s]+'0');mypc(c);} void writer(double x, char c){printf("%.15f",x);mypc(c);} void writer(const char c[]){int i;for(i=0;c[i]!='\0';i++)mypc(c[i]);} void writer(const char x[], char c){int i;for(i=0;x[i]!='\0';i++)mypc(x[i]);mypc(c);} template void writerLn(T x){writer(x,'\n');} template void writerLn(T x, S y){writer(x,' ');writer(y,'\n');} template void writerLn(T x, S y, U z){writer(x,' ');writer(y,' ');writer(z,'\n');} template void writerArr(T x[], int n){int i;if(!n){mypc('\n');return;}rep(i,n-1)writer(x[i],' ');writer(x[n-1],'\n');} ull pw(ull a, ull b, ull m){ull r=1;while(b){if(b&1)r=r*a%m;b>>=1;a=a*a%m;}return r;} int get_inv(ll a, int md){ll t=a,s=md,u=1,v=0,e;while(s){e=t/s;t-=e*s;u-=e*v;swap(t,s);swap(u,v);}if(u<0)u+=md;return u;} void mfft(int n, int x[], int root, int md, void *mem){int i,j,I,J,K,s=1,p,w,v,u,a,b,c,d,A,B,C,D,*y=(int*)mem;p=pw(root,(md-1)/4*3,md);root=pw(root,(md-1)/n,md);while(n>2){I=n/4;J=I+I;K=I+J;w=1;rep(i,I){v=(ll)w*w%md;u=(ll)w*v%md;rep(j,s){a=x[j+s*i];b=x[j+s*(i+I)];c=x[j+s*(i+J)];d=x[j+s*(i+K)];A=a+c;if(A>=md)A-=md;B=a-c;if(B<0)B+=md;C=b+d;if(C>=md)C-=md;D=b-d;if(D<0)D+=md;D=(ll)D*p%md;y[j+s*4*i]=A+C;y[j+s*(4*i+1)]=(ll)w*(B-D)%md;y[j+s*(4*i+2)]=(ll)v*(A-C)%md;y[j+s*(4*i+3)]=(ll)u*(B+D)%md;if(y[j+s*4*i]>=md)y[j+s*4*i]-=md;if(y[j+s*(4*i+1)]<0)y[j+s*(4*i+1)]+=md;if(y[j+s*(4*i+2)]<0)y[j+s*(4*i+2)]+=md;}w = (ll)w*root%md;}n/=4;s*=4;root=(ll)root*root%md;root=(ll)root*root%md;swap(x,y);}if(n==2){rep(i,s){y[i]=x[i]+x[i+s];if(y[i]>=md)y[i]-=md;y[i+s]=x[i]-x[i+s];if(y[i+s]<0)y[i+s]+=md;}n/=2;s*=2;root=(ll)root*root%md;swap(x,y);}rep(i,s)y[i]=x[i];} void mfftinv(int n, int x[], int root, int md, void *mem){int i,j,I,J,K,s=1,p,w,v,u,a,b,c,d,A,B,C,D,*y=(int*)mem;root=get_inv(root,md);p=pw(root,(md-1)/4,md);root=pw(root,(md-1)/n,md);while(n>2){I=n/4;J=I+I;K=I+J;w=1;rep(i,I){v=(ll)w*w%md;u=(ll)w*v%md;rep(j,s){a=x[j+s*i];b=x[j+s*(i+I)];c=x[j+s*(i+J)];d=x[j+s*(i+K)];A=a+c;if(A>=md)A-=md;B=a-c;if(B<0)B+=md;C=b+d;if(C>=md)C-=md;D=b-d;if(D<0)D+=md;D=(ll)D*p%md;y[j+s*4*i]=A+C;y[j+s*(4*i+1)]=(ll)w*(B+D)%md;y[j+s*(4*i+2)]=(ll)v*(A-C)%md;y[j+s*(4*i+3)]=(ll)u*(B-D)%md;if(y[j+s*4*i]>=md)y[j+s*4*i]-=md;if(y[j+s*(4*i+2)]<0)y[j+s*(4*i+2)]+=md;if(y[j+s*(4*i+3)]<0)y[j+s*(4*i+3)]+=md;}w=(ll)w*root%md;}n/=4;s*=4;root=(ll)root*root%md;root=(ll)root*root%md;swap(x,y);}if(n==2){rep(i,s){y[i]=x[i]+x[i+s];if(y[i]>=md)y[i]-=md;y[i+s]=x[i]-x[i+s];if(y[i+s]<0)y[i+s]+=md;}n/=2;s*=2;root=(ll)root*root%md;swap(x,y);}rep(i,s)y[i]=x[i];} template void modconvolution(S A[], int As, T B[], int Bs, U res[], int Rs, void *mem, int md, int root, int ordered=1){int i,n,k,r,*a,*b;n=max(As+Bs,Rs);for(k=1;k void modconvolution(S A[], int As, T res[], int Rs, void *mem, int md, int root, int ordered=1){int i,n,k,r,*a;n=max(2*As,Rs);for(k=1;k>W);if(t>=md)t-=md;return t;}unsigned reduce(ull T){unsigned m=(unsigned)T*mdninv;unsigned t=(unsigned)((T+(ull)m*md)>>W);if(t>=md)t-=md;return t;} unsigned get(){return reduce(val);} mint&operator+=(mint a){val+=a.val;if(val>=md)val-=md;return*this;}mint&operator-=(mint a){if(val>=1;a*=a;}return r;} }; unsigned mint::md, mint::W, mint::R, mint::Rinv, mint::mdninv, mint::RR; mint operator+(int a, mint b){return mint(a)+=b;}mint operator-(int a, mint b){return mint(a)-=b;}mint operator*(int a, mint b){return mint(a)*=b;}mint operator/(int a, mint b){return mint(a)/=b;} mint operator+(ll a, mint b){return mint(a)+=b;}mint operator-(ll a, mint b){return mint(a)-=b;}mint operator*(ll a, mint b){return mint(a)*=b;}mint operator/(ll a, mint b){return mint(a)/=b;} mint mval[10000], minv[10000]; void mint_init(int md=MD, mint val[]=mval, int vals=10000, mint inv[]=minv, int invs=10000){int i;val[0].setmod(md);val[0].val=0;REP(i,1,vals){val[i].val=val[i-1].val+mint::R;if(val[i].val >=md)val[i].val-=md;}inv[1].val=1;REP(i,2,invs){inv[i].val=md-((ll)(md/i)*inv[md%i].val%md);}REP(i,1,invs)inv[i].val=(ull)inv[i].val*mint::R%md;} static inline __m128i addmod(const __m128i &a, const __m128i &b, const __m128i &md){ static const __m128i zeros = _mm_set_epi32(0,0,0,0); __m128i tmp = _mm_add_epi32(a, b); __m128i mask = _mm_or_si128(_mm_or_si128(_mm_cmpgt_epi32(tmp, md), _mm_cmpeq_epi32(tmp, md)), _mm_cmplt_epi32(tmp,zeros)); return _mm_sub_epi32(tmp, _mm_and_si128(mask, md)); } static inline __m128i submod(const __m128i &a, const __m128i &b, const __m128i &md){ __m128i tmp = _mm_sub_epi32(a, b); __m128i mask = _mm_cmpgt_epi32(b, a); return _mm_add_epi32(tmp, _mm_and_si128(mask, md)); } static inline __m128i mullo(const __m128i &a, const __m128i &b){ __m128i tmp1 = _mm_mul_epu32(a,b); __m128i tmp2 = _mm_mul_epu32( _mm_srli_si128(a,4), _mm_srli_si128(b,4)); return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0))); } static inline __m128i mulhi(const __m128i &a, const __m128i &b){ __m128i tmp1 = _mm_mul_epu32(a,b); __m128i tmp2 = _mm_mul_epu32( _mm_srli_si128(a,4), _mm_srli_si128(b,4)); return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,3,1)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,3,1))); } static inline __m128i mulmod(const __m128i &a, const __m128i &b, const __m128i &md, const __m128i &mdninv){ static const __m128i mask0 = _mm_set_epi32(0,0,0,-1); static const __m128i mask1 = _mm_set_epi32(0,0,-1,0); static const __m128i mask2 = _mm_set_epi32(0,-1,0,0); static const __m128i mask3 = _mm_set_epi32(-1,0,0,0); static const __m128i zeros = _mm_set_epi32(0,0,0,0); __m128i mask; __m128i t = mullo(a, b); __m128i t1 = _mm_mul_epu32(a,b); __m128i t2 = _mm_mul_epu32(_mm_srli_si128(a,4),_mm_srli_si128(b,4)); __m128i m = mullo(t, mdninv); __m128i tmp1 = _mm_mul_epu32(m,md); __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m,4), _mm_srli_si128(md,4)); tmp1 = _mm_add_epi64(tmp1,t1); tmp2 = _mm_add_epi64(tmp2,t2); t = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,3,1)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,3,1))); mask = _mm_or_si128(_mm_or_si128(_mm_cmpgt_epi32(t, md), _mm_cmpeq_epi32(t, md)), _mm_cmplt_epi32(t,zeros)); return _mm_sub_epi32(t, _mm_and_si128(mask, md)); } void mfft(int n, mint x[], mint root, void *mem){ int i, j, k; int n1, n2, n3, step = 1; mint w1, w2, w3, a, b, c, d, aa, bb, cc, dd, tmp, *y = (mint*)mem; __m128i A, B, C, D, AA, BB, CC, DD, md, t1, t2, mask, TMP, W1, W2, W3, MDNINV; tmp = root.pw((mint::md-1)/4*3); root = root.pw((mint::md-1)/n); md = _mm_set_epi32(mint::md, mint::md, mint::md, mint::md); TMP = _mm_set_epi32(tmp.val, tmp.val, tmp.val, tmp.val); MDNINV = _mm_set_epi32(mint::mdninv, mint::mdninv, mint::mdninv, mint::mdninv); while(n > 2){ n1 = n / 4; n2 = n1 + n1; n3 = n1 + n2; w1.val = mint::R; rep(i,n1){ w2 = w1*w1; w3 = w1*w2; W1 = _mm_set_epi32(w1.val, w1.val, w1.val, w1.val); W2 = _mm_set_epi32(w2.val, w2.val, w2.val, w2.val); W3 = _mm_set_epi32(w3.val, w3.val, w3.val, w3.val); for(j=0;j+3 2){ n1 = n / 4; n2 = n1 + n1; n3 = n1 + n2; w1.val = mint::R; rep(i,n1){ w2 = w1*w1; w3 = w1*w2; W1 = _mm_set_epi32(w1.val, w1.val, w1.val, w1.val); W2 = _mm_set_epi32(w2.val, w2.val, w2.val, w2.val); W3 = _mm_set_epi32(w3.val, w3.val, w3.val, w3.val); for(j=0;j+3