#include #pragma GCC target("avx") #pragma GCC optimize("Ofast") #pragma GCC optimize("no-stack-protector") using namespace std; namespace FastIO{ #ifdef ONLINE_JUDGE char IN[(1<<20)],(*inS)=IN,(*inT)=IN; #define getchar() (inS==inT&&(inT=(inS=IN)+fread(IN,1,1<<20,stdin),inS==inT)?EOF:(*(inS++))) char OUT[(1<<20)],(*outS)=OUT; #define putchar(x) (outS-OUT==(1<<20)?fwrite(OUT,1,(1<<20),stdout),outS=OUT,0:0,(*(outS++))=x) struct Writer{ ~Writer(){ fwrite(OUT,1,outS-OUT,stdout); } }writer; #endif template inline void read(type &num){ type base; char ch=0; num=0; for(;ch<'0'||ch>'9';ch=getchar()) base=(ch=='-')?-1:1; for(;'0'<=ch&&ch<='9';ch=getchar()) num=num*10+(ch^48); num=num*base; } template inline void write(type num){ if(num<0) num=(-num),putchar('-'); int top=0,stk[35]; do stk[top++]=num%10,num/=10; while(num); while(top) putchar((stk[--top]^48)); } template inline void read(type &x,Args&... args){ read(x),read(args...); } template inline void write(type x,Args... args){ write(x,' '),write(args...); } } using namespace FastIO; const int N=200010,M=998244353,B=350,S=200000/B+1; int T,n,m,fac[N],inv[N],pw[N],csum[S][N]; int comb(int sum1,int sum2){ if(sum1