#pragma GCC optimize ("O3") #pragma GCC optimize("unroll-loops") #pragma GCC target ("fma") #include #include #define TE template #define TY typename #define US using #define ST static #define IN inline #define CL class #define PU public #define OP operator #define CE constexpr #define CO const #define NE noexcept #define RE return #define WH while #define VO void #define VE vector #define LI list #define BE begin #define EN end #define SZ size #define MO move #define TH this #define CRI CO int& #define CRUI CO uint& US namespace std;US uint = unsigned int;US ll = long long;US ull = unsigned long long; #define MAIN __attribute__( ( target( "fma" ) ) ) int main #define TYPE_OF(VAR) decay_t #define UNTIE ios_base::sync_with_stdio(false); cin.tie(nullptr) #define CEXPR(LL,BOUND,VALUE) CE LL BOUND = VALUE #define CIN( LL , A ) LL A{}; StdStream::Scan( A ) #define ASSERT(A,MIN,MAX) assert(MIN <= A && A <= MAX) #define CIN_ASSERT(A,MIN,MAX) CIN(TYPE_OF(MAX),A); ASSERT(A,MIN,MAX) #define FOR(VAR,INITIAL,FINAL_PLUS_ONE)for(TYPE_OF(FINAL_PLUS_ONE)VAR = INITIAL;VAR < FINAL_PLUS_ONE;++VAR) #define FOREQ(VAR,INITIAL,FINAL)for(TYPE_OF(FINAL)VAR = INITIAL;VAR <= FINAL;++VAR) #define FOREQINV(VAR,INITIAL,FINAL)for(TYPE_OF(INITIAL)VAR = INITIAL;VAR >= FINAL;--VAR) #define FOR_ITR(ARRAY,ITR,END)for(auto ITR = ARRAY.begin(),END = ARRAY.end();ITR != END;++ITR) #define REPEAT(HOW_MANY_TIMES)FOR(VARIABLE_FOR_REPEAT,0,HOW_MANY_TIMES) #define COUT(ANSWER)cout << (ANSWER) << "\n" CEXPR(uint,P,998244353);TE CE INT& RS(INT& n)NE{RE n < 0?((((++n)*= -1)%= M)*= -1)+= M - 1:n %= M;}TE CE uint& RS(uint& n)NE{RE n %= M;}TE CE ull& RS(ull& n)NE{RE n %= M;}TE CE INT& RSP(INT& n)NE{CE uint trunc = (1 << 23)- 1;INT n_u = n >> 23;n &= trunc;INT n_uq = (n_u / 7)/ 17;n_u -= n_uq * 119;n += n_u << 23;RE n < n_uq?n += P - n_uq:n -= n_uq;}TE <> CE ull& RS(ull& n)NE{CE ull Pull = P;CE ull Pull2 = (Pull - 1)* (Pull - 1);RE RSP(n > Pull2?n -= Pull2:n);}TE CE INT RS(INT&& n)NE{RE MO(RS(n));}TE CE INT RS(CO INT& n)NE{RE RS(INT(n));} #define SFINAE_FOR_MOD(DEFAULT)TY T,enable_if_t >::value>* DEFAULT #define DC_OF_CM_FOR_MOD(FUNC)IN bool OP FUNC(CO Mod& n)CO NE #define DC_OF_AR_FOR_MOD(FUNC)IN Mod OP FUNC(CO Mod& n)CO NE;TE IN Mod OP FUNC(T&& n)CO NE; #define DF_OF_CM_FOR_MOD(FUNC)TE IN bool Mod::OP FUNC(CO Mod& n)CO NE{RE m_n FUNC n.m_n;} #define DF_OF_AR_FOR_MOD(FUNC,FORMULA)TE IN Mod Mod::OP FUNC(CO Mod& n)CO NE{RE MO(Mod(*TH)FUNC ## = n);}TE TE IN Mod Mod::OP FUNC(T&& n)CO NE{RE FORMULA;}TE IN Mod OP FUNC(T&& n0,CO Mod& n1)NE{RE MO(Mod(forward(n0))FUNC ## = n1);} TE CL Mod{PU:uint m_n;PU:CE Mod()NE;CE Mod(CO Mod& n)NE;CE Mod(Mod& n)NE;CE Mod(Mod&& n)NE;TE CE Mod(CO T& n)NE;TE CE Mod(T& n)NE;TE CE Mod(T&& n)NE;CE Mod& OP=(CO Mod& n)NE;CE Mod& OP=(Mod&& n)NE;CE Mod& OP+=(CO Mod& n)NE;CE Mod& OP-=(CO Mod& n)NE;CE Mod& OP*=(CO Mod& n)NE;IN Mod& OP/=(CO Mod& n);CE Mod& OP<<=(int n)NE;CE Mod& OP>>=(int n)NE;CE Mod& OP++()NE;CE Mod OP++(int)NE;CE Mod& OP--()NE;CE Mod OP--(int)NE;DC_OF_CM_FOR_MOD(==);DC_OF_CM_FOR_MOD(!=);DC_OF_CM_FOR_MOD(<);DC_OF_CM_FOR_MOD(<=);DC_OF_CM_FOR_MOD(>);DC_OF_CM_FOR_MOD(>=);DC_OF_AR_FOR_MOD(+);DC_OF_AR_FOR_MOD(-);DC_OF_AR_FOR_MOD(*);DC_OF_AR_FOR_MOD(/);CE Mod OP<<(int n)CO NE;CE Mod OP>>(int n)CO NE;CE Mod OP-()CO NE;CE Mod& SignInvert()NE;CE Mod& Double()NE;CE Mod& Halve()NE;IN Mod& Invert();TE CE Mod& PositivePW(T&& EX)NE;TE CE Mod& NonNegativePW(T&& EX)NE;TE CE Mod& PW(T&& EX);CE VO swap(Mod& n)NE;CE CO uint& RP()CO NE;ST CE Mod DeRP(CO uint& n)NE;ST CE uint& Normalise(uint& n)NE;ST IN CO Mod& Inverse(CO uint& n)NE;ST IN CO Mod& Factorial(CO uint& n)NE;ST IN CO Mod& FactorialInverse(CO uint& n)NE;ST IN CO Mod& zero()NE;ST IN CO Mod& one()NE;PU:TE CE Mod& Ref(T&& n)NE;}; #define SFINAE_FOR_MN(DEFAULT)TY T,enable_if_t,decay_t >::value>* DEFAULT #define DC_OF_AR_FOR_MN(FUNC)IN MN OP FUNC(CO MN& n)CO NE;TE IN MN OP FUNC(T&& n)CO NE; #define DF_OF_CM_FOR_MN(FUNC)TE IN bool MN::OP FUNC(CO MN& n)CO NE{RE m_n FUNC n.m_n;} #define DF_OF_AR_FOR_MN(FUNC,FORMULA)TE IN MN MN::OP FUNC(CO MN& n)CO NE{RE MO(MN(*TH)FUNC ## = n);}TE TE IN MN MN::OP FUNC(T&& n)CO NE{RE FORMULA;}TE IN MN OP FUNC(T&& n0,CO MN& n1)NE{RE MO(MN(forward(n0))FUNC ## = n1);} TE CL MN :PU Mod{PU:CE MN()NE;CE MN(CO MN& n)NE;CE MN(MN& n)NE;CE MN(MN&& n)NE;TE CE MN(CO T& n)NE;TE CE MN(T&& n)NE;CE MN& OP=(CO MN& n)NE;CE MN& OP=(MN&& n)NE;CE MN& OP+=(CO MN& n)NE;CE MN& OP-=(CO MN& n)NE;CE MN& OP*=(CO MN& n)NE;IN MN& OP/=(CO MN& n);CE MN& OP<<=(int n)NE;CE MN& OP>>=(int n)NE;CE MN& OP++()NE;CE MN OP++(int)NE;CE MN& OP--()NE;CE MN OP--(int)NE;DC_OF_AR_FOR_MN(+);DC_OF_AR_FOR_MN(-);DC_OF_AR_FOR_MN(*);DC_OF_AR_FOR_MN(/);CE MN OP<<(int n)CO NE;CE MN OP>>(int n)CO NE;CE MN OP-()CO NE;CE MN& SignInvert()NE;CE MN& Double()NE;CE MN& Halve()NE;CE MN& Invert();TE CE MN& PositivePW(T&& EX)NE;TE CE MN& NonNegativePW(T&& EX)NE;TE CE MN& PW(T&& EX);CE uint RP()CO NE;CE Mod Reduce()CO NE;ST CE MN DeRP(CO uint& n)NE;ST IN CO MN& Formise(CO uint& n)NE;ST IN CO MN& Inverse(CO uint& n)NE;ST IN CO MN& Factorial(CO uint& n)NE;ST IN CO MN& FactorialInverse(CO uint& n)NE;ST IN CO MN& zero()NE;ST IN CO MN& one()NE;PU:ST CE uint Form(CO uint& n)NE;ST CE ull& Reduction(ull& n)NE;ST CE ull& ReducedMU(ull& n,CO uint& m)NE;ST CE uint MU(CO uint& n0,CO uint& n1)NE;ST CE uint BaseSquareTruncation(uint& n)NE;TE CE MN& Ref(T&& n)NE;};TE CE MN Twice(CO MN& n)NE;TE CE MN Half(CO MN& n)NE;TE CE MN Inverse(CO MN& n);TE CE MN PW(CO MN& n,CO T& EX);TE CE MN<2> PW(CO MN<2>& n,CO T& p);TE CE T Square(CO T& t);TE <> CE MN<2> Square >(CO MN<2>& t);TE CE VO swap(MN& n0,MN& n1)NE;TE IN string to_string(CO MN& n)NE;TE IN basic_ostream& OP<<(basic_ostream& os,CO MN& n); TE CL COantsForMod{PU:COantsForMod()= delete;ST CE bool g_even = ((M & 1)== 0);ST CE uint g_memory_bound = 1000000;ST CE uint g_memory_LE = M < g_memory_bound?M:g_memory_bound;ST CE ull MNBasePW(ull&& EX)NE;ST CE uint g_M_minus = M - 1;ST CE uint g_M_minus_2 = M - 2;ST CE uint g_M_minus_2_neg = 2 - M;ST CE int g_MN_digit = 32;ST CE ull g_MN_base = ull(1)<< g_MN_digit;ST CE uint g_MN_base_minus = uint(g_MN_base - 1);ST CE uint g_MN_digit_half = (g_MN_digit + 1)>> 1;ST CE uint g_MN_base_sqrt_minus = (1 << g_MN_digit_half)- 1;ST CE uint g_MN_M_neg_inverse = uint((g_MN_base - MNBasePW((ull(1)<< (g_MN_digit - 1))- 1))& g_MN_base_minus);ST CE uint g_MN_base_mod = uint(g_MN_base % M);ST CE uint g_MN_base_square_mod = uint(((g_MN_base % M)* (g_MN_base % M))% M);};TE CE ull COantsForMod::MNBasePW(ull&& EX)NE{ull prod = 1;ull PW = M;WH(EX != 0){(EX & 1)== 1?(prod *= PW)&= g_MN_base_minus:prod;EX >>= 1;(PW *= PW)&= g_MN_base_minus;}RE prod;} #include #define SET_VE_32_128_FOR_SIMD(UINT,VE_NAME,SCALAR0,SCALAR1,SCALAR2,SCALAR3)CE UINT VE_NAME ## _copy[4] ={SCALAR0,SCALAR1,SCALAR2,SCALAR3};ST CO __m128i v_ ## VE_NAME = _mm_load_si128((__m128i*)VE_NAME ##_copy); #define SET_VE_64_128_FOR_SIMD(UINT,VE_NAME,SCALAR0,SCALAR1)CE UINT VE_NAME ## _copy[2] ={SCALAR0,SCALAR1};ST CO __m128i v_ ## VE_NAME = _mm_load_si128((__m128i*)VE_NAME ##_copy); #define SET_VE_64_256_FOR_SIMD(ULL,VE_NAME,SCALAR0,SCALAR1,SCALAR2,SCALAR3)CE ULL VE_NAME ## _copy[4] ={SCALAR0,SCALAR1,SCALAR2,SCALAR3};ST CO __m256i v_ ## VE_NAME = _mm256_load_si256((__m256i*)VE_NAME ##_copy); #define SET_CO_VE_32_128_FOR_SIMD(UINT,VE_NAME,SCALAR)SET_VE_32_128_FOR_SIMD(UINT,VE_NAME,SCALAR,SCALAR,SCALAR,SCALAR); #define SET_CO_VE_64_128_FOR_SIMD(ULL,VE_NAME,SCALAR)SET_VE_64_128_FOR_SIMD(ULL,VE_NAME,SCALAR,SCALAR); #define SET_CO_VE_64_256_FOR_SIMD(ULL,VE_NAME,SCALAR)SET_VE_64_256_FOR_SIMD(ULL,VE_NAME,SCALAR,SCALAR,SCALAR,SCALAR); TE CL COantsForSIMDForMod{PU:COantsForSIMDForMod()= delete;ST IN CO __m128i& v_M()NE;ST IN CO __m128i& v_Mull()NE;ST IN CO __m128i& v_M_minus()NE;ST IN CO __m128i& v_M_neg_inverse()NE;ST IN CO __m128i& v_digitull()NE;};TE IN CO __m128i& COantsForSIMDForMod::v_M()NE{SET_CO_VE_32_128_FOR_SIMD(uint,M,M);RE v_M;}TE IN CO __m128i& COantsForSIMDForMod::v_Mull()NE{SET_CO_VE_64_128_FOR_SIMD(ull,Mull,M);RE v_Mull;}TE IN CO __m128i& COantsForSIMDForMod::v_M_minus()NE{SET_CO_VE_32_128_FOR_SIMD(uint,M_minus,M - 1);RE v_M_minus;}TE IN CO __m128i& COantsForSIMDForMod::v_M_neg_inverse()NE{SET_CO_VE_32_128_FOR_SIMD(uint,M_neg_inverse,COantsForMod::g_MN_M_neg_inverse);RE v_M_neg_inverse;}TE IN CO __m128i& COantsForSIMDForMod::v_digitull()NE{SET_CO_VE_64_128_FOR_SIMD(ull,digitull,COantsForMod::g_MN_digit);RE v_digitull;}TE IN __m128i& SIMD_RS_32_128(__m128i& v)NE{CO __m128i& v_M = COantsForSIMDForMod::v_M();RE v -= v_M * _mm_cmpgt_epi32(v,v_M);}TE IN __m128i& SIMD_RS_64_128(__m128i& v)NE{ull v_copy[2];_mm_store_si128((__m128i*)v_copy,v);for(uint i = 0;i < 2;i++){ull& v_copy_i = v_copy[i];v_copy_i = (v_copy_i < M?0:M);}RE v -= _mm_load_si128((__m128i*)v_copy);}TE IN __m256i& SIMD_RS_64_256(__m256i& v)NE{ull v_copy[4];_mm256_store_si256((__m256i*)v_copy,v);for(uint i = 0;i < 4;i++){ull& v_copy_i = v_copy[i];v_copy_i = (v_copy_i < M?0:M);}RE v -= _mm256_load_si256((__m256i*)v_copy);}CE int SIMD_Shuffle(CRI a0,CRI a1,CRI a2,CRI a3)NE{RE (a0 << (0 << 1))+ (a1 << (1 << 1))+ (a2 << (2 << 1))+ (a3 << (3 << 1));}TE IN VO SIMD_Addition_32_64(CO Mod& a0,CO Mod& a1,CO Mod& b0,CO Mod& b1,Mod& c0,Mod& c1)NE{uint a_copy[4] ={a0.m_n,a1.m_n,0,0};uint b_copy[4] ={b0.m_n,b1.m_n,0,0};__m128i v_a = _mm_load_si128((__m128i*)a_copy);v_a += _mm_load_si128((__m128i*)b_copy);ST CO __m128i& v_M_minus = COantsForSIMDForMod::v_M_minus();ST CO __m128i& v_M = COantsForSIMDForMod::v_M();v_a += _mm_cmpgt_epi32(v_a,v_M_minus)& v_M;_mm_store_si128((__m128i*)a_copy,v_a);c0.m_n = MO(a_copy[0]);c1.m_n = MO(a_copy[1]);RE;}TE IN VO SIMD_Addition_32_128(CO Mod& a0,CO Mod& a1,CO Mod& a2,CO Mod& a3,CO Mod& b0,CO Mod& b1,CO Mod& b2,CO Mod& b3,Mod& c0,Mod& c1,Mod& c2,Mod& c3)NE{uint a_copy[4] ={a0.m_n,a1.m_n,a2.m_n,a3.m_n};uint b_copy[4] ={b0.m_n,b1.m_n,b2.m_n,b3.m_n};__m128i v_a = _mm_load_si128((__m128i*)a_copy)+ _mm_load_si128((__m128i*)b_copy);_mm_store_si128((__m128i*)a_copy,v_a);for(uint i = 0;i < 4;i++){b_copy[i] = a_copy[i] < M?0:M;}v_a -= _mm_load_si128((__m128i*)b_copy);_mm_store_si128((__m128i*)a_copy,v_a);c0.m_n = MO(a_copy[0]);c1.m_n = MO(a_copy[1]);c2.m_n = MO(a_copy[2]);c3.m_n = MO(a_copy[3]);RE;}TE IN VO SIMD_Substracition_32_64(CO Mod& a0,CO Mod& a1,CO Mod& b0,CO Mod& b1,Mod& c0,Mod& c1)NE{uint a_copy[4] ={a0.m_n,a1.m_n,0,0};uint b_copy[4] ={b0.m_n,b1.m_n,0,0};__m128i v_a = _mm_load_si128((__m128i*)a_copy);__m128i v_b = _mm_load_si128((__m128i*)b_copy);_mm_store_si128((__m128i*)a_copy,v_a);for(uint i = 0;i < 2;i++){b_copy[i] = a_copy[i] < b_copy[i]?M:0;}(v_a += _mm_load_si128((__m128i*)b_copy))-= v_b;_mm_store_si128((__m128i*)a_copy,v_a);c0.m_n = MO(a_copy[0]);c1.m_n = MO(a_copy[1]);RE;}TE IN VO SIMD_Subtraction_32_128(CO Mod& a0,CO Mod& a1,CO Mod& a2,CO Mod& a3,CO Mod& b0,CO Mod& b1,CO Mod& b2,CO Mod& b3,Mod& c0,Mod& c1,Mod& c2,Mod& c3)NE{uint a_copy[4] ={a0.m_n,a1.m_n,a2.m_n,a3.m_n};uint b_copy[4] ={b0.m_n,b1.m_n,b2.m_n,b3.m_n};__m128i v_a = _mm_load_si128((__m128i*)a_copy);__m128i v_b = _mm_load_si128((__m128i*)b_copy);_mm_store_si128((__m128i*)a_copy,v_a);for(uint i = 0;i < 4;i++){b_copy[i] = a_copy[i] < b_copy[i]?M:0;}(v_a += _mm_load_si128((__m128i*)b_copy))-= v_b;_mm_store_si128((__m128i*)a_copy,v_a);c0.m_n = MO(a_copy[0]);c1.m_n = MO(a_copy[1]);c2.m_n = MO(a_copy[2]);c3.m_n = MO(a_copy[3]);RE;} US MP = Mod

;US MNP = MN

;TE CE uint MN::Form(CO uint& n)NE{ull n_copy = n;RE uint(MO(Reduction(n_copy *= COantsForMod::g_MN_base_square_mod)));}TE CE ull& MN::Reduction(ull& n)NE{ull n_sub = n & COantsForMod::g_MN_base_minus;RE ((n += ((n_sub *= COantsForMod::g_MN_M_neg_inverse)&= COantsForMod::g_MN_base_minus)*= M)>>= COantsForMod::g_MN_digit)< M?n:n -= M;}TE CE ull& MN::ReducedMU(ull& n,CO uint& m)NE{RE Reduction(n *= m);}TE CE uint MN::MU(CO uint& n0,CO uint& n1)NE{ull n0_copy = n0;RE uint(MO(ReducedMU(ReducedMU(n0_copy,n1),COantsForMod::g_MN_base_square_mod)));}TE CE uint MN::BaseSquareTruncation(uint& n)NE{CO uint n_u = n >> COantsForMod::g_MN_digit_half;n &= COantsForMod::g_MN_base_sqrt_minus;RE n_u;}TE CE MN::MN()NE:Mod(){static_assert(! COantsForMod::g_even);}TE CE MN::MN(CO MN& n)NE:Mod(n){}TE CE MN::MN(MN& n)NE:Mod(n){}TE CE MN::MN(MN&& n)NE:Mod(MO(n)){}TE TE CE MN::MN(CO T& n)NE:Mod(n){static_assert(! COantsForMod::g_even);Mod::m_n = Form(Mod::m_n);}TE TE CE MN::MN(T&& n)NE:Mod(forward(n)){static_assert(! COantsForMod::g_even);Mod::m_n = Form(Mod::m_n);}TE CE MN& MN::OP=(CO MN& n)NE{RE Ref(Mod::OP=(n));}TE CE MN& MN::OP=(MN&& n)NE{RE Ref(Mod::OP=(MO(n)));}TE CE MN& MN::OP+=(CO MN& n)NE{RE Ref(Mod::OP+=(n));}TE CE MN& MN::OP-=(CO MN& n)NE{RE Ref(Mod::OP-=(n));}TE CE MN& MN::OP*=(CO MN& n)NE{ull m_n_copy = Mod::m_n;RE Ref(Mod::m_n = MO(ReducedMU(m_n_copy,n.m_n)));}TE IN MN& MN::OP/=(CO MN& n){RE OP*=(MN(n).Invert());}TE CE MN& MN::OP<<=(int n)NE{RE Ref(Mod::OP<<=(n));}TE CE MN& MN::OP>>=(int n)NE{RE Ref(Mod::OP>>=(n));}TE CE MN& MN::OP++()NE{RE Ref(Mod::Normalise(Mod::m_n += COantsForMod::g_MN_base_mod));}TE CE MN MN::OP++(int)NE{MN n{*TH};OP++();RE n;}TE CE MN& MN::OP--()NE{RE Ref(Mod::m_n < COantsForMod::g_MN_base_mod?((Mod::m_n += M)-= COantsForMod::g_MN_base_mod):Mod::m_n -= COantsForMod::g_MN_base_mod);}TE CE MN MN::OP--(int)NE{MN n{*TH};OP--();RE n;}DF_OF_AR_FOR_MN(+,MN(forward(n))+= *TH);DF_OF_AR_FOR_MN(-,MN(forward(n)).SignInvert()+= *TH);DF_OF_AR_FOR_MN(*,MN(forward(n))*= *TH);DF_OF_AR_FOR_MN(/,MN(forward(n)).Invert()*= *TH);TE CE MN MN::OP<<(int n)CO NE{RE MO(MN(*TH)<<= n);}TE CE MN MN::OP>>(int n)CO NE{RE MO(MN(*TH)>>= n);}TE CE MN MN::OP-()CO NE{RE MO(MN(*TH).SignInvert());}TE CE MN& MN::SignInvert()NE{RE Ref(Mod::m_n > 0?Mod::m_n = M - Mod::m_n:Mod::m_n);}TE CE MN& MN::Double()NE{RE Ref(Mod::Double());}TE CE MN& MN::Halve()NE{RE Ref(Mod::Halve());}TE CE MN& MN::Invert(){assert(Mod::m_n > 0);RE PositivePW(uint(COantsForMod::g_M_minus_2));}TE TE CE MN& MN::PositivePW(T&& EX)NE{MN PW{*TH};(--EX)%= COantsForMod::g_M_minus_2;WH(EX != 0){(EX & 1)== 1?OP*=(PW):*TH;EX >>= 1;PW *= PW;}RE *TH;}TE TE CE MN& MN::NonNegativePW(T&& EX)NE{RE EX == 0?Ref(Mod::m_n = 1):PositivePW(forward(EX));}TE TE CE MN& MN::PW(T&& EX){bool neg = EX < 0;assert(!(neg && Mod::m_n == 0));RE neg?PositivePW(forward(EX *= COantsForMod::g_M_minus_2_neg)):NonNegativePW(forward(EX));}TE CE uint MN::RP()CO NE{ull m_n_copy = Mod::m_n;RE MO(Reduction(m_n_copy));}TE CE Mod MN::Reduce()CO NE{ull m_n_copy = Mod::m_n;RE Mod::DeRP(MO(Reduction(m_n_copy)));}TE CE MN MN::DeRP(CO uint& n)NE{RE MN(Mod::DeRP(n));}TE IN CO MN& MN::Formise(CO uint& n)NE{ST MN memory[COantsForMod::g_memory_LE] ={zero(),one()};ST uint LE_curr = 2;WH(LE_curr <= n){memory[LE_curr] = DeRP(LE_curr);LE_curr++;}RE memory[n];}TE IN CO MN& MN::Inverse(CO uint& n)NE{ST MN memory[COantsForMod::g_memory_LE] ={zero(),one()};ST uint LE_curr = 2;WH(LE_curr <= n){memory[LE_curr] = MN(Mod::Inverse(LE_curr));LE_curr++;}RE memory[n];}TE IN CO MN& MN::Factorial(CO uint& n)NE{ST MN memory[COantsForMod::g_memory_LE] ={one(),one()};ST uint LE_curr = 2;ST MN val_curr{one()};MN val_last{one()};WH(LE_curr <= n){memory[LE_curr++] = val_curr *= ++val_last;}RE memory[n];}TE IN CO MN& MN::FactorialInverse(CO uint& n)NE{ST MN memory[COantsForMod::g_memory_LE] ={one(),one()};ST uint LE_curr = 2;ST MN val_curr{one()};MN val_last{one()};WH(LE_curr <= n){memory[LE_curr] = val_curr *= Inverse(LE_curr);LE_curr++;}RE memory[n];}TE IN CO MN& MN::zero()NE{ST CE MN z{};RE z;}TE IN CO MN& MN::one()NE{ST CE MN o{DeRP(1)};RE o;}TE TE CE MN& MN::Ref(T&& n)NE{RE *TH;}TE CE MN Twice(CO MN& n)NE{RE MO(MN(n).Double());}TE CE MN Half(CO MN& n)NE{RE MO(MN(n).Halve());}TE CE MN Inverse(CO MN& n){RE MO(MN(n).Invert());}TE CE MN PW(CO MN& n,CO T& EX){RE MO(MN(n).PW(T(EX)));}TE CE VO swap(MN& n0,MN& n1)NE{n0.swap(n1);}TE IN string to_string(CO MN& n)NE{RE to_string(n.RP())+ " + MZ";}TE IN basic_ostream& OP<<(basic_ostream& os,CO MN& n){RE os << n.RP();} TE CE Mod::Mod()NE:m_n(){}TE CE Mod::Mod(CO Mod& n)NE:m_n(n.m_n){}TE CE Mod::Mod(Mod& n)NE:m_n(n.m_n){}TE CE Mod::Mod(Mod&& n)NE:m_n(MO(n.m_n)){}TE TE CE Mod::Mod(CO T& n)NE:m_n(RS(n)){}TE TE CE Mod::Mod(T& n)NE:m_n(RS(decay_t(n))){}TE TE CE Mod::Mod(T&& n)NE:m_n(RS(forward(n))){}TE CE Mod& Mod::OP=(CO Mod& n)NE{RE Ref(m_n = n.m_n);}TE CE Mod& Mod::OP=(Mod&& n)NE{RE Ref(m_n = MO(n.m_n));}TE CE Mod& Mod::OP+=(CO Mod& n)NE{RE Ref(Normalise(m_n += n.m_n));}TE CE Mod& Mod::OP-=(CO Mod& n)NE{RE Ref(m_n < n.m_n?(m_n += M)-= n.m_n:m_n -= n.m_n);}TE CE Mod& Mod::OP*=(CO Mod& n)NE{RE Ref(m_n = COantsForMod::g_even?RS(ull(m_n)* n.m_n):MN::MU(m_n,n.m_n));}TE <> CE MP& MP::OP*=(CO MP& n)NE{ull m_n_copy = m_n;RE Ref(m_n = MO((m_n_copy *= n.m_n)< P?m_n_copy:RSP(m_n_copy)));}TE IN Mod& Mod::OP/=(CO Mod& n){RE OP*=(Mod(n).Invert());}TE CE Mod& Mod::OP<<=(int n)NE{WH(n-- > 0){Normalise(m_n <<= 1);}RE *TH;}TE CE Mod& Mod::OP>>=(int n)NE{WH(n-- > 0){((m_n & 1)== 0?m_n:m_n += M)>>= 1;}RE *TH;}TE CE Mod& Mod::OP++()NE{RE Ref(m_n < COantsForMod::g_M_minus?++m_n:m_n = 0);}TE CE Mod Mod::OP++(int)NE{Mod n{*TH};OP++();RE n;}TE CE Mod& Mod::OP--()NE{RE Ref(m_n == 0?m_n = COantsForMod::g_M_minus:--m_n);}TE CE Mod Mod::OP--(int)NE{Mod n{*TH};OP--();RE n;}DF_OF_CM_FOR_MOD(==);DF_OF_CM_FOR_MOD(!=);DF_OF_CM_FOR_MOD(>);DF_OF_CM_FOR_MOD(>=);DF_OF_CM_FOR_MOD(<);DF_OF_CM_FOR_MOD(<=);DF_OF_AR_FOR_MOD(+,Mod(forward(n))+= *TH);DF_OF_AR_FOR_MOD(-,Mod(forward(n)).SignInvert()+= *TH);DF_OF_AR_FOR_MOD(*,Mod(forward(n))*= *TH);DF_OF_AR_FOR_MOD(/,Mod(forward(n)).Invert()*= *TH);TE CE Mod Mod::OP<<(int n)CO NE{RE MO(Mod(*TH)<<= n);}TE CE Mod Mod::OP>>(int n)CO NE{RE MO(Mod(*TH)>>= n);}TE CE Mod Mod::OP-()CO NE{RE MO(Mod(*TH).SignInvert());}TE CE Mod& Mod::SignInvert()NE{RE Ref(m_n > 0?m_n = M - m_n:m_n);}TE CE Mod& Mod::Double()NE{RE Ref(Normalise(m_n <<= 1));}TE CE Mod& Mod::Halve()NE{RE Ref(((m_n & 1)== 0?m_n:m_n += M)>>= 1);}TE IN Mod& Mod::Invert(){assert(m_n > 0);uint m_n_neg;RE m_n < COantsForMod::g_memory_LE?Ref(m_n = Inverse(m_n).m_n):(m_n_neg = M - m_n < COantsForMod::g_memory_LE)?Ref(m_n = M - Inverse(m_n_neg).m_n):PositivePW(uint(COantsForMod::g_M_minus_2));}TE <> IN Mod<2>& Mod<2>::Invert(){assert(m_n > 0);RE *TH;}TE TE CE Mod& Mod::PositivePW(T&& EX)NE{Mod PW{*TH};EX--;WH(EX != 0){(EX & 1)== 1?OP*=(PW):*TH;EX >>= 1;PW *= PW;}RE *TH;}TE <> TE CE Mod<2>& Mod<2>::PositivePW(T&& EX)NE{RE *TH;}TE TE CE Mod& Mod::NonNegativePW(T&& EX)NE{RE EX == 0?Ref(m_n = 1):Ref(PositivePW(forward(EX)));}TE TE CE Mod& Mod::PW(T&& EX){bool neg = EX < 0;assert(!(neg && m_n == 0));neg?EX *= COantsForMod::g_M_minus_2_neg:EX;RE m_n == 0?*TH:(EX %= COantsForMod::g_M_minus)== 0?Ref(m_n = 1):PositivePW(forward(EX));}TE IN CO Mod& Mod::Inverse(CO uint& n)NE{ST Mod memory[COantsForMod::g_memory_LE] ={zero(),one()};ST uint LE_curr = 2;WH(LE_curr <= n){memory[LE_curr].m_n = M - MN::MU(memory[M % LE_curr].m_n,M / LE_curr);LE_curr++;}RE memory[n];}TE IN CO Mod& Mod::Factorial(CO uint& n)NE{ST Mod memory[COantsForMod::g_memory_LE] ={one(),one()};ST uint LE_curr = 2;WH(LE_curr <= n){memory[LE_curr] = MN::Factorial(LE_curr).Reduce();LE_curr++;}RE memory[n];}TE IN CO Mod& Mod::FactorialInverse(CO uint& n)NE{ST Mod memory[COantsForMod::g_memory_LE] ={one(),one()};ST uint LE_curr = 2;WH(LE_curr <= n){memory[LE_curr] = MN::FactorialInverse(LE_curr).Reduce();LE_curr++;}RE memory[n];}TE CE VO Mod::swap(Mod& n)NE{std::swap(m_n,n.m_n);}TE CE CO uint& Mod::RP()CO NE{RE m_n;}TE CE Mod Mod::DeRP(CO uint& n)NE{Mod n_copy{};n_copy.m_n = n;RE n_copy;}TE CE uint& Mod::Normalise(uint& n)NE{RE n < M?n:n -= M;}TE IN CO Mod& Mod::zero()NE{ST CE Mod z{};RE z;}TE IN CO Mod& Mod::one()NE{ST CE Mod o{DeRP(1)};RE o;}TE TE CE Mod& Mod::Ref(T&& n)NE{RE *TH;}TE CE Mod Twice(CO Mod& n)NE{RE MO(Mod(n).Double());}TE CE Mod Half(CO Mod& n)NE{RE MO(Mod(n).Halve());}TE IN Mod Inverse(CO Mod& n){RE MO(Mod(n).Invert());}TE CE Mod Inverse_COrexpr(CO uint& n)NE{RE MO(Mod::DeRP(RS(n)).NonNegativePW(M - 2));}TE CE Mod PW(CO Mod& n,CO T& EX){RE MO(Mod(n).PW(T(EX)));}TE CE VO swap(Mod& n0,Mod& n1)NE{n0.swap(n1);}TE IN string to_string(CO Mod& n)NE{RE to_string(n.RP())+ " + MZ";}TE IN basic_ostream& OP<<(basic_ostream& os,CO Mod& n){RE os << n.RP();} #define SFINAE_FOR_MA(DEFAULT) TY Arg,enable_if_t::value>* DEFAULT #define VEISATION_FOR_TTMA_FOR_MOD(MODULO) TE <> IN TTMA >& TTMA >::OP+=(CO TTMA >& mat) NE{SIMD_Addition_32_128(m_M00,m_M01,m_M10,m_M11,mat.m_M00,mat.m_M01,mat.m_M10,mat.m_M11,m_M00,m_M01,m_M10,m_M11);RE *TH;}TE <> IN TTMA >& TTMA >::OP+=(CO TTMA >& mat) NE{SIMD_Addition_32_128(m_M00,m_M01,m_M10,m_M11,mat.m_M00,mat.m_M01,mat.m_M10,mat.m_M11,m_M00,m_M01,m_M10,m_M11);RE *TH;}TE <> IN TTMA >& TTMA >::OP-=(CO TTMA >& mat) NE{SIMD_Subtraction_32_128(m_M00,m_M01,m_M10,m_M11,mat.m_M00,mat.m_M01,mat.m_M10,mat.m_M11,m_M00,m_M01,m_M10,m_M11);RE *TH;}TE <> IN TTMA >& TTMA >::OP-=(CO TTMA >& mat) NE{SIMD_Subtraction_32_128(m_M00,m_M01,m_M10,m_M11,mat.m_M00,mat.m_M01,mat.m_M10,mat.m_M11,m_M00,m_M01,m_M10,m_M11);RE *TH;} TE CL TTMA;TE CL TOMA{PU:T m_M0;T m_M1;PU:CE TOMA(CO T& M0 = T(),CO T& M1 = T())NE;CE TOMA(T&& M0,T&& M1)NE;CE TOMA(CO TOMA& mat)NE;CE TOMA(TOMA&& mat)NE;CE TOMA& OP=(CO TOMA& mat)NE;CE TOMA& OP=(TOMA&& mat)NE;CE TOMA& OP+=(CO TOMA& mat)NE;CE TOMA& OP-=(CO TOMA& mat)NE;IN TOMA& OP*=(CO TTMA& mat)NE;CE TOMA& OP*=(CO T& scalar)NE;TE CE TOMA& OP*=(CO Arg& scalar)NE;IN TOMA& OP/=(CO T& scalar);TE CE TOMA& OP/=(CO Arg& scalar);IN TOMA& OP%=(CO T& scalar);TE CE TOMA& OP%=(CO Arg& scalar);CE T& GetEntry(CRUI y) CO NE;CE T& RefEntry(CRUI y)NE;}; #define VECTRISATION_FOR_TTMA_FOR_MOD(TYPE,MODULO) TE <> IN TTMA& TTMA::OP+=(CO TTMA& mat)NE{uint TH_copy[4] ={m_M00.m_n,m_M01.m_n,m_M10.m_n,m_M11.m_n};uint mat_copy[4] ={mat.m_M00.m_n,mat.m_M01.m_n,mat.m_M10.m_n,mat.m_M11.m_n};__m128i v_sum = _mm_load_si128((__m128i*)this_copy) + _mm_load_si128((__m128i*)mat_copy );_mm_store_si128((__m128i*)this_copy,v_sum);uint dif[4];for(uint i = 0;i < 4;i++){dif[i] = this_copy[i] < MODULO?0:MODULO;}v_sum -= _mm_load_si128((__m128i*)dif);_mm_store_si128((__m128i*)this_copy,v_sum);m_M00.m_n = TH_copy[0];m_M01.m_n = TH_copy[1];m_M10.m_n = TH_copy[2];m_M11.m_n = TH_copy[3];RE *TH;}TE <> IN TTMA& TTMA::OP-=(CO TTMA& mat)NE{SET_CO_VE_32_128_FOR_SIMD(uint,M,MODULO);uint TH_copy[4] ={m_M00.m_n,m_M01.m_n,m_M10.m_n,m_M11.m_n};uint mat_copy[4] ={mat.m_M00.m_n,mat.m_M01.m_n,mat.m_M10.m_n,mat.m_M11.m_n};__m128i v_sub = _mm_load_si128((__m128i*)this_copy) + v_M - _mm_load_si128((__m128i*)mat_copy );_mm_store_si128((__m128i*)this_copy,v_sub);uint dif[4];for(uint i = 0;i < 4;i++){dif[i] = this_copy[i] < MODULO?0:MODULO;}v_sub -= _mm_load_si128((__m128i*)dif);_mm_store_si128((__m128i*)this_copy,v_sub);for(uint i = 0;i < 4;i++){Mod::Normalise(TH_copy[i]);}m_M00.m_n = TH_copy[0];m_M01.m_n = TH_copy[1];m_M10.m_n = TH_copy[2];m_M11.m_n = TH_copy[3];RE *TH;} TE CL TTMA{PU:T m_M00;T m_M01;T m_M10;T m_M11;PU:CE TTMA(CO T& M00,CO T& M01,CO T& M10,CO T& M11)NE;CE TTMA(T&& M00,T&& M01,T&& M10,T&& M11)NE;CE TTMA(CO T& n = T())NE;TE CE TTMA(CO Arg& n)NE;CE TTMA(CO TTMA& mat)NE;CE TTMA(TTMA&& mat)NE;CE TTMA& OP=(CO TTMA& mat)NE;CE TTMA& OP=(TTMA&& mat)NE;IN TTMA& OP+=(CO TTMA& mat)NE;IN TTMA& OP-=(CO TTMA& mat)NE;IN TTMA& OP*=(CO TTMA& mat)NE;CE TTMA& OP*=(CO T& scalar)NE;TE CE TTMA& OP*=(CO Arg& scalar)NE;IN TTMA& OP/=(CO TTMA& mat);IN TTMA& OP/=(CO T& scalar);TE CE TTMA& OP/=(CO Arg& scalar);IN TTMA& OP%=(CO T& scalar);TE CE TTMA& OP%=(CO Arg& scalar);IN TTMA& Invert();IN TTMA OP*(CO TTMA& mat) CO NE;IN TOMA OP*(CO TOMA& mat) CO NE;IN TTMA OP/(CO TTMA& mat) CO;CE TTMA Square() CO NE;CE T& GetEntry(CRUI y,CRUI x) CO NE;CE T& RefEntry(CRUI y,CRUI x)NE;};TE CE TTMA OP*(CO Arg& scalar,CO TTMA& mat)NE;TE CE TTMA OP*(CO TTMA& mat,CO T& scalar)NE;TE IN TTMA OP/(CO TTMA& mat,CO Arg& scalar);TE IN TTMA OP%(CO TTMA& mat,CO Arg& scalar); TE CE TOMA::TOMA(CO T& M0,CO T& M1)NE:m_M0(M0),m_M1(M1){}TE CE TOMA::TOMA(T&& M0,T&& M1)NE:m_M0(MO(M0)),m_M1(MO(M1)){}TE CE TOMA::TOMA(CO TOMA& mat)NE:m_M0(mat.m_M0),m_M1(mat.m_M1){}TE CE TOMA::TOMA(TOMA&& mat)NE:m_M0(MO(mat.m_M0)),m_M1(MO(mat.m_M1)){}TE CE TOMA& TOMA::OP=(CO TOMA& mat)NE{if(&mat != TH){m_M0 = mat.m_M0;m_M1 = mat.m_M1;}RE *TH;}TE CE TOMA& TOMA::OP=(TOMA&& mat)NE{m_M0 = MO(mat.m_M0);m_M1 = MO(mat.m_M1);RE *TH;}TE CE TOMA& TOMA::OP+=(CO TOMA& mat)NE{m_M0 += mat.m_M0;m_M1 += mat.m_M1;RE *TH;}TE CE TOMA& TOMA::OP-=(CO TOMA& mat)NE{m_M0 -= mat.m_M0;m_M1 -= mat.m_M1;RE *TH;}TE IN TOMA& TOMA::OP*=(CO TTMA& mat)NE{RE OP=(mat * *TH);}TE CE TOMA& TOMA::OP*=(CO T& scalar)NE{m_M0 *= scalar;m_M1 *= scalar;RE *TH;}TE TE CE TOMA& TOMA::OP*=(CO Arg& scalar)NE{RE OP*=(T(scalar));}TE IN TOMA& TOMA::OP/=(CO T& scalar){m_M0 /= scalar;m_M1 /= scalar;RE *TH;}TE TE CE TOMA& TOMA::OP/=(CO Arg& scalar){RE OP/=(T(scalar));}TE IN TOMA& TOMA::OP%=(CO T& scalar){m_M0 %= scalar;m_M1 %= scalar;RE *TH;}TE TE CE TOMA& TOMA::OP%=(CO Arg& scalar){RE OP%=(T(scalar));}TE CE T& TOMA::GetEntry(CRUI y) CO NE{RE y == 0?m_M0:m_M1;}TE CE T& TOMA::RefEntry(CRUI y)NE{RE y == 0?m_M0:m_M1;} TE CE TTMA::TTMA(CO T& M00,CO T& M01,CO T& M10,CO T& M11) NE:m_M00(M00),m_M01(M01),m_M10(M10),m_M11(M11){}TE CE TTMA::TTMA(T&& M00,T&& M01,T&& M10,T&& M11) NE:m_M00(MO(M00)),m_M01(MO(M01)),m_M10(MO(M10)),m_M11(MO(M11)){}TE CE TTMA::TTMA(CO T& n) NE:m_M00(n),m_M01(),m_M10(),m_M11(n){}TE TE CE TTMA::TTMA(CO Arg& n) NE:TTMA(T(n)){} TE CE TTMA::TTMA(CO TTMA& mat) NE:m_M00(mat.m_M00),m_M01(mat.m_M01),m_M10(mat.m_M10),m_M11(mat.m_M11){}TE CE TTMA::TTMA(TTMA&& mat) NE:m_M00(MO(mat.m_M00)),m_M01(MO(mat.m_M01)),m_M10(MO(mat.m_M10)),m_M11(MO(mat.m_M11)){}TE CE TTMA& TTMA::OP=(CO TTMA& mat) NE{if(&mat != TH){m_M00 = mat.m_M00;m_M01 = mat.m_M01;m_M10 = mat.m_M10;m_M11 = mat.m_M11;}RE *TH;}TE CE TTMA& TTMA::OP=(TTMA&& mat) NE{m_M00 = MO(mat.m_M00);m_M01 = MO(mat.m_M01);m_M10 = MO(mat.m_M10);m_M11 = MO(mat.m_M11);RE *TH;}TE IN TTMA& TTMA::OP+=(CO TTMA& mat) NE{m_M00 += mat.m_M00;m_M01 += mat.m_M01;m_M10 += mat.m_M10;m_M11 += mat.m_M11;RE *TH;}TE IN TTMA& TTMA::OP-=(CO TTMA& mat) NE{m_M00 -= mat.m_M00;m_M01 -= mat.m_M01;m_M10 -= mat.m_M10;m_M11 -= mat.m_M11;RE *TH;}TE IN TTMA& TTMA::OP*=(CO TTMA& mat) NE{RE OP=(*TH * mat);}TE CE TTMA& TTMA::OP*=(CO T& scalar) NE{m_M00 *= scalar;m_M01 *= scalar;m_M10 *= scalar;m_M11 *= scalar;RE *TH;}TE TE CE TTMA& TTMA::OP*=(CO Arg& scalar) NE{RE OP*=(T(scalar));}TE IN TTMA& TTMA::OP/=(CO TTMA& mat){RE OP=(*TH / mat);}TE IN TTMA& TTMA::OP/=(CO T& scalar){RE OP*=(T(1) / scalar);}TE TE CE TTMA& TTMA::OP/=(CO Arg& scalar){RE OP/=(T(scalar));}TE IN TTMA& TTMA::OP%=(CO T& scalar){m_M00 %= scalar;m_M01 %= scalar;m_M10 %= scalar;m_M11 %= scalar;RE *TH;}TE TE CE TTMA& TTMA::OP%=(CO Arg& scalar){RE OP%=(T(scalar));}TE IN TTMA& TTMA::Invert(){CO T det_inv{T(1) / (m_M00 * m_M11 - m_M01 * m_M10)};swap(m_M00,m_M11);m_M01 = T() - m_M01;m_M11 = T() - m_M10;RE OP*=(det_inv);} TE IN TTMA TTMA::OP*(CO TTMA& mat) CO NE{RE TTMA(m_M00 * mat.m_M00 + m_M01 * mat.m_M10,m_M00 * mat.m_M01 + m_M01 * mat.m_M11,m_M10 * mat.m_M00 + m_M11 * mat.m_M10,m_M10 * mat.m_M01 + m_M11 * mat.m_M11);}TE IN TOMA TTMA::OP*(CO TOMA& mat) CO NE{RE TOMA(m_M00 * mat.m_M0 + m_M01 * mat.m_M1,m_M10 * mat.m_M0 + m_M11 * mat.m_M1);}TE IN TTMA TTMA::OP/(CO TTMA& mat) CO{CO T det_inv{T(1) / (mat.m_M00 * mat.m_M11 - mat.m_M01 * mat.m_M10)};RE TTMA((m_M00 * mat.m_M11 - m_M01 * mat.m_M10) * det_inv,(m_M01 * mat.m_M00 - m_M00 * mat.m_M01) * det_inv,(m_M10 * mat.m_M11 - m_M11 * mat.m_M10) * det_inv,(m_M11 * mat.m_M00 - m_M10 * mat.m_M01) * det_inv);}TE CE TTMA TTMA::Square() CO NE{RE TTMA(m_M00 * m_M00 + m_M01 * m_M10,(m_M00 + m_M11) * m_M01,m_M10 * (m_M00 + m_M11),m_M10 * m_M01 + m_M11 * m_M11);}TE CE T& TTMA::GetEntry(CRUI y,CRUI x) CO NE{RE y == 0?x == 0?m_M00:m_M01:x == 0?m_M10:m_M11;}TE CE T& TTMA::RefEntry(CRUI y,CRUI x) NE{RE y == 0?x == 0?m_M00:m_M01:x == 0?m_M10:m_M11;} TE IN TTMA OP+(CO TTMA& mat1,CO TTMA& mat2) NE{RE MO(TTMA(mat1) += mat2);}TE IN TTMA OP-(CO TTMA& mat1,CO TTMA& mat2) NE{RE MO(TTMA(mat1) -= mat2);}TE CE TTMA OP*(CO T& scalar,CO TTMA& mat) NE{RE MO(TTMA(mat) *= scalar);}TE CE TTMA OP*(CO Arg& scalar,CO TTMA& mat) NE{RE T(scalar) * mat;}TE CE TTMA OP*(CO TTMA& mat,CO T& scalar) NE{RE MO(TTMA(mat) *= scalar);}TE CE TTMA OP*(CO TTMA& mat,CO Arg& scalar) NE{RE mat * T(scalar);}TE IN TTMA OP/(CO TTMA& mat,CO T& scalar){RE MO(TTMA(mat) /= scalar);}TE IN TTMA OP/(CO TTMA& mat,CO Arg& scalar){RE mat / T(scalar);}TE IN TTMA OP%(CO TTMA& mat,CO T& scalar){RE MO(TTMA(mat) %= scalar);}TE IN TTMA OP%(CO TTMA& mat,CO Arg& scalar){RE mat % T(scalar);}TE CE TTMA Square(CO TTMA& mat) NE{RE mat.Square();}VEISATION_FOR_TTMA_FOR_MOD(P); #define SFINAE_FOR_PO(DEFAULT) TY Arg,enable_if_t >::value>* DEFAULT TE CL PO{PU:VE m_f;uint m_SZ;PU:IN PO();IN PO(CO T& t);IN PO(T&& t);TE IN PO(CO Arg& n);IN PO(CO PO& f);IN PO(PO&& f);IN PO(CRUI i,CO T& t);IN PO(CRUI i,T&& t);TE IN PO(CRUI i,CO Arg& n);IN PO(CO VE& f);IN PO(VE&& f);IN PO& OP=(CO T& t);IN PO& OP=(T&& t);TE IN PO& OP=(CO Arg& n);IN PO& OP=(CO PO& f);IN PO& OP=(PO&& f);IN PO& OP=(CO VE& f);IN PO& OP=(VE&& f);IN CO T& OP[](CRUI i) CO;IN T& OP[](CRUI i);IN T OP()(CO T& t) CO;PO& OP+=(CO PO& f);PO& OP-=(CO PO& f);PO& OP*=(CO PO& f);PO& OP*=(PO&& f);PO& OP/=(CO T& t);IN PO& OP/=(CO PO& f);PO& OP%=(CO T& t);PO& OP%=(CO PO& f);IN PO OP-() CO;PO& OP<<=(CO T& t);IN CO VE& GetCoefficient() CO NE;IN CRUI SZ() CO NE;IN VO swap(PO& f);IN VO swap(VE& f);VO ReMORedundantZero();IN string Display() CO NE;ST PO Quotient(CO PO& f0,CO PO& f1);ST PO TPQuotient(CO PO& f0,CRUI f0_TP_SZ,CO PO& f1_TP_inverse,CRUI f1_SZ);ST PO TP(CO PO& f,CRUI f_TP_SZ);ST IN CO PO& zero();ST IN CO T& CO_zero();ST IN CO T& CO_one();ST IN CO T& CO_minus_one();}; TE IN PO::PO():m_f(),m_SZ(0){}TE IN PO::PO(CO T& t):PO(){if(t != CO_zero()){OP[](0) = t;}}TE IN PO::PO(T&& t):PO(){if(t != CO_zero()){OP[](0) = MO(t);}}TE TE IN PO::PO(CO Arg& n):PO(T(n)){}TE IN PO::PO(CO PO& f):m_f(f.m_f),m_SZ(f.m_SZ){}TE IN PO::PO(PO&& f):m_f(MO(f.m_f)),m_SZ(MO(f.m_SZ)){}TE IN PO::PO(CRUI i,CO T& t):PO(){if(t != CO_zero()){OP[](i) = t;}}TE IN PO::PO(CRUI i,T&& t):PO(){if(t != CO_zero()){OP[](i) = MO(t);}}TE TE IN PO::PO(CRUI i,CO Arg& n):PO(i,T(n)){}TE IN PO::PO(CO VE& f):m_f(f),m_SZ(m_f.SZ()){}TE IN PO::PO(VE&& f):m_f(MO(f)),m_SZ(m_f.SZ()){}TE IN PO& PO::OP=(CO T& t){m_f.clear();m_SZ = 0;OP[](0) = t;RE *TH;}TE IN PO& PO::OP=(T&& t){m_f.clear();m_SZ = 0;OP[](0) = MO(t);RE *TH;}TE TE IN PO& PO::OP=(CO Arg& n){RE OP=(T(n));}TE IN PO& PO::OP=(CO PO& f){m_f = f.m_f;m_SZ = f.m_SZ;RE *TH;}TE IN PO& PO::OP=(PO&& f){m_f = MO(f.m_f);m_SZ = MO(f.m_SZ);RE *TH;}TE IN PO& PO::OP=(CO VE& f){m_f = f;m_SZ = f.m_SZ;RE *TH;}TE IN PO& PO::OP=(VE&& f){m_f = MO(f);m_SZ = m_f.SZ();RE *TH;}TE CO T& PO::OP[](CRUI i) CO{if(m_SZ <= i){RE CO_zero();}RE m_f[i];}TE IN T& PO::OP[](CRUI i){if(m_SZ <= i){CO T& z = CO_zero();WH(m_SZ <= i){m_f.push_back(z);m_SZ++;}}RE m_f[i];}TE IN T PO::OP()(CO T& t) CO{RE MO((*TH % (PO(1,CO_one()) - t))[0]);}TE PO& PO::OP+=(CO PO& f){if(m_SZ < f.m_SZ){m_f.reserve(f.m_SZ);for(uint i = 0;i < m_SZ;i++){m_f[i] += f.m_f[i];}for(uint i = m_SZ;i < f.m_SZ;i++){m_f.push_back(f.m_f[i]);}m_SZ = f.m_SZ;}else{for(uint i = 0;i < f.m_SZ;i++){m_f[i] += f.m_f[i];}}RE *TH;}TE PO& PO::OP-=(CO PO& f){if(m_SZ < f.m_SZ){m_f.reserve(f.m_SZ);for(uint i = 0;i < m_SZ;i++){m_f[i] -= f.m_f[i];}for(uint i = m_SZ;i < f.m_SZ;i++){m_f.push_back(- f.m_f[i]);}m_SZ = f.m_SZ;}else{for(uint i = 0;i < f.m_SZ;i++){m_f[i] -= f.m_f[i];}}RE *TH;}TE PO& PO::OP*=(CO PO& f){if(m_SZ == 0){RE *TH;}if(f.m_SZ == 0){m_f.clear();m_SZ = 0;RE *TH;}CO uint SZ = m_SZ + f.m_SZ - 1;PO product{};for(uint i = 0;i < SZ;i++){T& product_i = product[i];CO uint j_min = m_SZ > i?0:i - m_SZ + 1;CO uint j_lim = i < f.m_SZ?i + 1:f.m_SZ;for(uint j = j_min;j < j_lim;j++){product_i += m_f[i - j] * f.m_f[j];}}RE OP=(MO(product));}TE IN PO& PO::OP*=(PO&& f){RE OP*=(f);};TE PO& PO::OP/=(CO T& t){if(t == CO_one()){RE *TH;}CO T t_inv{CO_one() / t};for(uint i = 0;i < m_SZ;i++){OP[](i) *= t_inv;}RE *TH;}TE PO PO::TP(CO PO& f,CRUI f_TP_SZ){VE f_TP(f_TP_SZ);for(uint d = 0;d < f_TP_SZ;d++){f_TP[d] = f.m_f[f.m_SZ - 1 - d];}RE PO(MO(f_TP));}TE PO& PO::OP%=(CO T& t){if(t == CO_one()){RE OP=(zero());}for(uint i = 0;i < m_SZ;i++){m_f[i] %= t;}RE *TH;}TE IN PO PO::OP-() CO{RE MO(PO() -= *TH);}TE IN CO VE& PO::GetCoefficient() CO NE{RE m_f;}TE IN CRUI PO::SZ() CO NE{RE m_SZ;}TE IN VO PO::swap(PO& f){m_f.swap(f.m_f);swap(m_SZ,f.m_SZ);}TE IN VO PO::swap(VE& f){m_f.swap(f);m_SZ = m_f.SZ();}TE VO PO::ReMORedundantZero(){CO T& z = CO_zero();WH(m_SZ > 0?m_f[m_SZ - 1] == z:false){m_f.pop_back();m_SZ--;}RE;}TE string PO::Display() CO NE{string s = "(";if(m_SZ > 0){s += to_string(m_f[0]);for(uint i = 1;i < m_SZ;i++){s += "," + to_string(m_f[i]);}}s += ")";RE s;}TE IN CO PO& PO::zero(){ST CO PO z{};RE z;}TE IN CO T& PO::CO_zero(){ST CO T z{0};RE z;}TE IN CO T& PO::CO_one(){ST CO T o{1};RE o;}TE IN CO T& PO::CO_minus_one(){ST CO T m{-1};RE m;}TE bool OP==(CO PO& f0,CO T& t1){CRUI SZ = f0.SZ();CO T& zero = PO::CO_zero();for(uint i = 1;i < SZ;i++){if(f0[i] != zero){RE false;}}RE f0[0] == t1;}TE bool OP==(CO PO& f0,CO PO& f1){CRUI SZ0 = f0.SZ();CRUI SZ1 = f1.SZ();CRUI SZ = SZ0 < SZ1?SZ1:SZ0;for(uint i = 0;i < SZ;i++){if(f0[i] != f1[i]){RE false;}}RE true;}TE IN bool OP!=(CO PO& f0,CO P& f1){RE !(f0 == f1);}TE IN PO OP+(CO PO& f0,CO P& f1){RE MO(PO(f0) += f1);}TE IN PO OP-(CO PO& f){RE PO::zero() - f;}TE IN PO OP-(CO PO& f0,CO P& f1){RE MO(PO(f0) -= f1);}TE IN PO OP*(CO PO& f0,CO P& f1){RE MO(PO(f0) *= f1);}TE IN PO OP/(CO PO& f0,CO T& t1){RE MO(PO(f0) /= t1);}TE TY V>T& Prod(V& f){if(f.empty()){f.push_back(T(1));}if(f.SZ() == 1){RE f.front();}auto IT = f.BE(),EN = f.EN();WH(IT != EN){T& t = *IT;IT++;if(IT != EN){t *= *IT;IT = f.erase(IT);}}RE Prod(f);} TE CL PW_CE{PU:T m_val[EX_lim];IN CE PW_CE(CO T& t,CO T& init = T(1));};TE IN CE PW_CE::PW_CE(CO T& t,CO T& init):m_val() {T PW{init};for( uint EX = 0;EX < EX_lim;EX++){m_val[EX] = PW;PW *= t;}} #define SFINAE_FOR_STD_STREAM( TYPE , DEFAULT ) \ typename T , enable_if_t::value>* DEFAULT \ #define DECLARATION_OF_SCAN( TYPE ) \ template static inline void Scan( T& t ) \ #define DEFINITION_OF_SCAN_FOR_SIGNED_INT_TYPE( TYPE ) \ template inline void StdStream::Scan( T& t ) { if( g_head == g_length ){ Load(); } while( g_c == g_space || g_c == g_new_line ){ ShiftHead(); } bool negative = false; if( g_c == g_minus ){ negative = true; ShiftHead(); } while( !( g_c == g_space || g_c == g_new_line ) ){ ( t *= 10 ) += ( g_c - g_zero ); ShiftHead(); } if( negative ){ t *= -1; } } \ #define DEFINITION_OF_SCAN_FOR_UNSIGNED_INT_TYPE( TYPE ) \ template inline void StdStream::Scan( T& t ) { if( g_head == g_length ){ Load(); } while( g_c == g_space || g_c == g_new_line ){ ShiftHead(); } while( !( g_c == g_space || g_c == g_new_line ) ){ ( t *= 10 ) += ( g_c - g_zero ); ShiftHead(); } } \ #define DEFINITION_OF_SCAN_FOR_STRING_TYPE( TYPE ) \ template inline void StdStream::Scan( T& t ) { if( g_head == g_length ){ Load(); } while( g_c == g_space || g_c == g_new_line ){ ShiftHead(); } while( !( g_c == g_space || g_c == g_new_line ) ){ t += g_c; ShiftHead(); } } \ class StdStream { private: using CharT = char; using Traits = char_traits; static constexpr int g_length_lim = 100000; static constexpr int g_length_max = g_length_lim - 1; static constexpr CharT g_space = ' '; static constexpr CharT g_new_line = '\n'; static constexpr CharT g_minus = '-'; static constexpr CharT g_zero = '0'; static constexpr basic_streambuf::int_type g_EOF = Traits::eof(); static int g_length; static int g_head; static basic_streambuf::int_type g_code; static CharT g_c; static CharT g_buffer[g_length_lim]; public: StdStream() = delete; DECLARATION_OF_SCAN( int ); DECLARATION_OF_SCAN( uint ); DECLARATION_OF_SCAN( ll ); DECLARATION_OF_SCAN( ull ); DECLARATION_OF_SCAN( string ); private: // basic_istream::read()からtry/catchブロックやiostateの更新を削除したread関数 static inline void Load(); static inline void ShiftHead(); static inline void ReadHead(); }; int StdStream::g_length = 0; int StdStream::g_head = 0; basic_streambuf::int_type StdStream::g_code = 0; StdStream::CharT StdStream::g_c = StdStream::g_space; StdStream::CharT StdStream::g_buffer[StdStream::g_length_lim] = {}; inline void StdStream::Load() { g_length = read( 0 , g_buffer , g_length_max ); g_head = -1; g_c = g_space; } inline void StdStream::ShiftHead() { if( ++g_head == g_length ){ Load(); ++g_head; } ReadHead(); } inline void StdStream::ReadHead() { g_c = ( g_head == g_length ) ? g_new_line : g_buffer[g_head]; } DEFINITION_OF_SCAN_FOR_SIGNED_INT_TYPE( int ); DEFINITION_OF_SCAN_FOR_SIGNED_INT_TYPE( ll ); DEFINITION_OF_SCAN_FOR_UNSIGNED_INT_TYPE( uint ); DEFINITION_OF_SCAN_FOR_UNSIGNED_INT_TYPE( ull ); DEFINITION_OF_SCAN_FOR_STRING_TYPE( string ); US MNPN = PO;US MNPNK = PO;IN CEXPR(int,fold_digit,5);IN CEXPR(int,fold,1 << fold_digit);IN CEXPR(int,deg_max,fold + 1);IN CEXPR(int,deg_lim,deg_max + 1); #define SET_CEXPR(NUM) CE MNP c ## NUM{MNP::DeRP(NUM)}; #define HONTAI MNP Ntd[fold + 1] ={c1};MNP Nt1{Nt};MNP Nt_power{c1};FOREQ(d,1,fold){Ntd[d] =(Nt_power *= Nt1);}TTMA diff[deg_lim] ={};TTMA& MNk = diff[0];FOREQ(deg,0,deg_max){TTMA &diff_deg = diff[deg];MNP* p_diff_deg[4] ={&(diff_deg.m_M00),&(diff_deg.m_M01),&(diff_deg.m_M10),&(diff_deg.m_M11)};vector(&TheAtsu_coef_deg)[4] = TheAtsu_coef[deg];vector(&TheAtsu_degree_deg)[4] = TheAtsu_degree[deg];FOR(i,0,4){MNP& diff_deg_i = *p_diff_deg[i];vector& TheAtsu_coef_deg_i = TheAtsu_coef_deg[i];vector& TheAtsu_degree_deg_i = TheAtsu_degree_deg[i];uint TheAtsu_coef_deg_i_SZ = TheAtsu_coef_deg_i.SZ();FOR(d,0,TheAtsu_coef_deg_i_SZ){diff_deg_i += Ntd[TheAtsu_degree_deg_i[d]] * coef_array[TheAtsu_coef_deg_i[d]];}}}TOMA vt{v};uint Kt_div = Kt >> fold_digit;REPEAT(Kt_div){TTMA* p_M_diff = &MNk;TTMA* p_M = p_M_diff++;vt *= MNk;FOR(deg,0,deg_max){*(p_M++) += *(p_M_diff++);}}uint k_start = Kt_div << fold_digit;MNP k_start_1{MNP::DeRP(k_start)};MNP Nt1_minus_k_start_1{Nt1 - k_start_1};MNk.m_M00 = Twice(Nt1_minus_k_start_1);MNk.m_M01 =((k_start & 1) == 0?k_start == 0?MNP(c0):move(((Twice(Nt1) -= k_start_1) += c1) *= MNP::DeRP(k_start >> 1)):move(((c1 - k_start_1).Halve() += Nt1) *= k_start_1));MNk.m_M10 = c1;MNk.m_M11 = c0;MNP diff01{Nt1_minus_k_start_1};FOR(k,k_start,Kt){vt *= MNk;MNk.m_M00 -= c2;MNk.m_M01 += diff01--;}COUT(vt.m_M0); __attribute__( ( target( "fma" ) ) ) int main() { UNTIE; CEXPR( uint , bound_T , 100000 ); CIN_ASSERT( T , 1 , bound_T ); // 本体を定数倍改善するための前計算 SET_CEXPR( 0 ); SET_CEXPR( 1 ); SET_CEXPR( 2 ); constexpr MNP c2_neg{ MNP::DeRP( P - 2 ) }; constexpr MNP c2_inv{ MNP::DeRP( ( P + 1 ) / 2 ) }; constexpr MNP c2_inv_neg{ MNP::DeRP( ( P - 1 ) / 2 ) }; const MNPN& zero = MNPN::zero(); const MNPN one{ 0 , c1 }; const MNPN two{ 0 , c2 }; const MNPN two_inv{ 0 , c2_inv }; constexpr TOMA v{ MNP::DeRP( 1 ) , MNP::DeRP( 0 ) }; TTMA MNk_shift { move( MNPNK( 1 , MNPN( 0 , c2_neg ) ) += MNPNK( 0 , MNPN( 1 , c2 ) ) ) , move( MNPNK( 2 , MNPN( 0 , c2_inv_neg ) ) += MNPNK( 1 , move( MNPN( 1 , c1 ) += c2_inv ) ) ) , MNPNK( 0 , one ) , MNPNK( 0 , zero ) }; list > M = {}; CEXPR( int , fold_minus , fold - 1 ); REPEAT( fold_minus ){ M.push_front( MNk_shift ); MNk_shift.m_M00.m_f[0] -= two; MNk_shift.m_M01.m_f[0] += ( MNk_shift.m_M01.m_f[1] -= one ) + two_inv; } M.push_front( move( MNk_shift ) ); vector comb[deg_lim] = {}; comb[0].push_back( one ); FOREQ( deg , 1 , deg_max ){ MNPN* p_comb_deg_minus_right = &( comb[deg - 1][0] ); MNPN* p_comb_deg_minus_left = p_comb_deg_minus_right++; vector& comb_deg = comb[deg]; comb_deg = vector( deg + 1 , zero ); comb_deg[0] = comb_deg[deg] = one; uint deg_half = ( deg + 1 ) / 2; FOR( ddeg , 1 , deg_half ){ comb_deg[ddeg] = comb_deg[deg - ddeg] = *p_comb_deg_minus_left + *p_comb_deg_minus_right; p_comb_deg_minus_left++; p_comb_deg_minus_right++; } if( deg % 2 == 0 ){ comb_deg[deg_half] = *p_comb_deg_minus_left + *p_comb_deg_minus_left; } } constexpr PW_CE fp{ fold }; MNPN fp1[deg_lim]; FOREQ( deg , 0 , deg_max ){ fp1[deg] = MNPN( 0 , fp.m_val[deg] ); } vector > comb_fp{}; comb_fp.reserve( deg_lim ); comb_fp.push_back( vector() ); FOR( ddeg , 1 , deg_lim ){ vector& comb_ddeg = comb[ddeg]; comb_fp.push_back( vector() ); vector& comb_fp_ddeg = comb_fp[ddeg]; comb_fp_ddeg.reserve( ddeg ); comb_fp_ddeg.push_back( fp1[ddeg] ); FOR( dddeg , 1 , ddeg ){ comb_fp_ddeg.push_back( comb_ddeg[dddeg] * fp1[ddeg - dddeg] ); } } TTMA prod[deg_lim]; TTMA& prod_curr = prod[deg_max]; prod_curr = Prod( M ); MNPNK* p_prod_curr[4] = { &( prod_curr.m_M00 ) , &( prod_curr.m_M01 ) , &( prod_curr.m_M10 ) , &( prod_curr.m_M11 ) }; FOR( deg , 0 , deg_max ){ prod[deg] = prod_curr; FOR( i , 0 , 4 ){ MNPNK& prod_curr_i = *( p_prod_curr[i] ); const uint& size = prod_curr_i.size(); FOR( ddeg , 1 , size ){ vector& comb_fp_ddeg = comb_fp[ddeg]; MNPN& prod_curr_i_ddeg = prod_curr_i.m_f[ddeg]; FOR( dddeg , 0 , ddeg ){ prod_curr_i.m_f[dddeg] += prod_curr_i_ddeg * comb_fp_ddeg[dddeg]; } } } } // fold = 32の場合のcoefに必要のサイズの前計算値 CEXPR( uint , coef_size_max , 33 ); // ここはvectorよりuint[coef_size_max]の方が僅かに速い uint coef[deg_lim][4][coef_size_max]; uint coef_size[deg_lim][4] = {}; map coef_list{}; FOREQ( deg , 0 , deg_max ){ TTMA& diff_deg = prod[deg]; MNPN* p_diff_deg[4] = { &( diff_deg.m_M00[0] ) , &( diff_deg.m_M01[0] ) , &( diff_deg.m_M10[0] ) , &( diff_deg.m_M11[0] ) }; uint ( &coef_deg )[4][coef_size_max] = coef[deg]; uint ( &coef_size_deg )[4] = coef_size[deg]; FOR( i , 0 , 4 ){ MNPN& diff_deg_i = *( p_diff_deg[i] ); const uint& size = diff_deg_i.size(); uint ( &coef_deg_i )[coef_size_max] = coef_deg[i]; uint& coef_size_deg_i = coef_size_deg[i]; FOR( d , 0 , size ){ const uint& diff_deg_i_d = diff_deg_i[d].RP(); coef_deg_i[coef_size_deg_i++] = diff_deg_i_d; if( diff_deg_i_d > 0 ){ coef_list[diff_deg_i_d]; } } } TTMA* p_prod_curr = &( prod[deg_max] ); TTMA* p_prod_prev = p_prod_curr--; FOREQ( ddeg_trans , deg + 1 , deg_max ){ *p_prod_prev -= *p_prod_curr; p_prod_prev->m_M00.ReMORedundantZero(); p_prod_prev->m_M01.ReMORedundantZero(); p_prod_prev->m_M10.ReMORedundantZero(); p_prod_prev->m_M11.ReMORedundantZero(); p_prod_curr--; p_prod_prev--; } } // fold = 32の時のcoef_list.size()の前計算値 CEXPR( uint , coef_list_size , 2176 ); // ここはvectorよりuint[coef_list_size]の方が僅かに速い MNP coef_array[coef_list_size]; uint coef_array_size = 0; FOR_ITR( coef_list , itr , end ){ coef_array[itr->second = coef_array_size++] = MNP::DeRP( itr->first ); } // 係数0をスキップで1.1倍、座標圧縮で1.05倍くらい早くなる。 // ここをvectorでなくuint[coef_size_max]にすると逆に遅くなる。 vector TheAtsu_coef[deg_lim][4] = {}; vector TheAtsu_degree[deg_lim][4] = {}; FOREQ( deg , 0 , deg_max ){ uint ( &coef_deg )[4][coef_size_max] = coef[deg]; uint ( &coef_size_deg )[4] = coef_size[deg]; vector ( &TheAtsu_coef_deg )[4] = TheAtsu_coef[deg]; vector ( &TheAtsu_degree_deg )[4] = TheAtsu_degree[deg]; FOR( i , 0 , 4 ){ uint ( &coef_deg_i )[coef_size_max] = coef_deg[i]; uint& coef_size_deg_i = coef_size_deg[i]; vector& TheAtsu_coef_deg_i = TheAtsu_coef_deg[i]; vector& TheAtsu_degree_deg_i = TheAtsu_degree_deg[i]; TheAtsu_coef_deg_i.reserve( coef_size_deg_i ); TheAtsu_degree_deg_i.reserve( coef_size_deg_i ); FOR( d , 0 , coef_size_deg_i ){ uint& coef_deg_i_d = coef_deg_i[d]; if( coef_deg_i_d != 0 ){ TheAtsu_coef_deg_i.push_back( coef_list[coef_deg_i_d] ); TheAtsu_degree_deg_i.push_back( d ); } } } } // ここから本体 CEXPR( ull , bound_N , 1000000000000000000 ); if( T > 5 ){ CEXPR( uint , bound_K1 , bound_T ); REPEAT( T ){ CIN_ASSERT( Nt , 1 , bound_N ); CIN_ASSERT( Kt , 0 , bound_K1 ); HONTAI; } } else { CEXPR( ull , bound_K2 , bound_N ); REPEAT( T ){ CIN_ASSERT( Nt , 1 , bound_N ); CIN_ASSERT( Ktull , 0 , bound_K2 ); if( Ktull >= P ){ COUT( 0 ); } else { uint Kt = uint( Ktull ); HONTAI; } } } return 0; }