#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __GNUC__ #include #endif #define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i)) #define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i)) #define reu(i,l,u) for(int (i)=(int)(l);(i)<(int)(u);++(i)) #if defined(_MSC_VER) || __cplusplus > 199711L #define aut(r,v) auto r = (v) #else #define aut(r,v) __typeof(v) r = (v) #endif #define each(it,o) for(aut(it, (o).begin()); it != (o).end(); ++ it) #define all(o) (o).begin(), (o).end() #define pb(x) push_back(x) #define mp(x,y) make_pair((x),(y)) #define mset(m,v) memset(m,v,sizeof(m)) #define INF 0x3f3f3f3f #define INFL 0x3f3f3f3f3f3f3f3fLL using namespace std; typedef vector vi; typedef pair pii; typedef vector > vpii; typedef long long ll; template inline void amin(T &x, U y) { if(y < x) x = y; } template inline void amax(T &x, U y) { if(x < y) x = y; } #if defined(_M_X64) || defined(__amd64__) #define MP_64 #endif struct MontgomeryModInt { static const int Mod = 1000000007; static const int W = 32; static const int R = (1ULL << W) % Mod; //= 2^W static const int InvR = 518424770; //= R^{-1} (mod Mod) static const int InvNegMod = (int)2226617417U; //= (-Mod)^{-1} (mod R) #if defined(_MSC_VER) || __cplusplus > 199711L static_assert(InvR < Mod && ((unsigned long long)R * InvR) % Mod == 1, "InvR = R^{-1} (mod Mod)"); static_assert((((unsigned long long)((1ULL << W) - Mod) * (unsigned long long)(unsigned)InvNegMod) & 0xffffffffULL) == 1, "InvNegMod = (-Mod)^{-1} (mod R)"); #endif unsigned residue; //residue = value * R % Mod MontgomeryModInt(): residue(0) { } MontgomeryModInt(signed sig): residue(multR(sig)) { } MontgomeryModInt(signed long long sig): residue(multR((signed)(sig % Mod))) { } static unsigned multR(signed sig) { signed sigt = (signed long long)sig * R % Mod; if(sigt < 0) sigt += Mod; return (unsigned)sigt; } int get() const { return (int)reduce(residue); } static unsigned reduce(unsigned T) { unsigned m = T * (unsigned)InvNegMod; unsigned t = (unsigned)((T + (unsigned long long)m * Mod) >> W); if(t >= Mod) t -= Mod; return t; } static unsigned reduce(unsigned long long T) { unsigned m = (unsigned)T * (unsigned)InvNegMod; unsigned t = (unsigned)((T + (unsigned long long)m * Mod) >> W); if(t >= Mod) t -= Mod; return t; } MontgomeryModInt &operator+=(MontgomeryModInt that) { if((residue += that.residue) >= Mod) residue -= Mod; return *this; } MontgomeryModInt &operator-=(MontgomeryModInt that) { if((residue += Mod - that.residue) >= Mod) residue -= Mod; return *this; } MontgomeryModInt &operator*=(MontgomeryModInt that) { residue = reduce((unsigned long long)residue * that.residue); return *this; } MontgomeryModInt &operator/=(MontgomeryModInt that) { return *this *= that.inverse(); } MontgomeryModInt operator+(MontgomeryModInt that) const { return MontgomeryModInt(*this) += that; } MontgomeryModInt operator-(MontgomeryModInt that) const { return MontgomeryModInt(*this) -= that; } MontgomeryModInt operator*(MontgomeryModInt that) const { MontgomeryModInt res; res.residue = reduce((unsigned long long)residue * that.residue); return res; } MontgomeryModInt operator/(MontgomeryModInt that) const { return MontgomeryModInt(*this) /= that; } MontgomeryModInt inverse() const { //a^{-1} R = (a R)^{-1} * R^2 static const int SquaredR = (unsigned long long)R * R % Mod; signed a = residue, b = Mod, u = 1, v = 0; while(b) { signed t = a / b; a -= t * b; std::swap(a, b); u -= t * v; std::swap(u, v); } if(u < 0) u += Mod; MontgomeryModInt res; res.residue = (unsigned long long)(unsigned)u * SquaredR % Mod; return res; } MontgomeryModInt operator-() const { MontgomeryModInt t; t.residue = residue == 0 ? 0 : Mod - residue; return t; } bool operator==(const MontgomeryModInt &that) const { return residue == that.residue; } bool operator!=(const MontgomeryModInt &that) const { return residue != that.residue; } }; typedef MontgomeryModInt mint; namespace mod_polynomial { struct Polynomial { typedef mint R; static R ZeroR() { return R(); } static R OneR() { return R(1); } static bool IsZeroR(R r) { return r == ZeroR(); } std::vector coefs; Polynomial() {} explicit Polynomial(R c0) : coefs(1, c0) {} explicit Polynomial(R c0, R c1) : coefs(2) { coefs[0] = c0, coefs[1] = c1; } template Polynomial(It be, It en) : coefs(be, en) {} static Polynomial Zero() { return Polynomial(); } static Polynomial One() { return Polynomial(OneR()); } static Polynomial X() { return Polynomial(ZeroR(), OneR()); } void resize(int n) { coefs.resize(n); } void clear() { coefs.clear(); } R *data() { return coefs.empty() ? NULL : &coefs[0]; } const R *data() const { return coefs.empty() ? NULL : &coefs[0]; } int size() const { return static_cast(coefs.size()); } bool empty() const { return coefs.empty(); } int degree() const { return size() - 1; } bool normalized() const { return coefs.empty() || coefs.back() != ZeroR(); } bool monic() const { return !coefs.empty() && coefs.back() == OneR(); } R get(int i) const { return 0 <= i && i < size() ? coefs[i] : ZeroR(); } void set(int i, R x) { if(size() <= i) resize(i + 1); coefs[i] = x; } void normalize() { while(!empty() && IsZeroR(coefs.back())) coefs.pop_back(); } R evaluate(R x) const { if(empty()) return R(); R r = coefs.back(); for(int i = size() - 2; i >= 0; -- i) { r *= x; r += coefs[i]; } return r; } Polynomial &operator+=(const Polynomial &that) { int m = size(), n = that.size(); if(m < n) resize(n); _add(data(), that.data(), n); return *this; } Polynomial operator+(const Polynomial &that) const { return Polynomial(*this) += that; } Polynomial &operator-=(const Polynomial &that) { int m = size(), n = that.size(); if(m < n) resize(n); _subtract(data(), that.data(), n); return *this; } Polynomial operator-(const Polynomial &that) const { return Polynomial(*this) -= that; } Polynomial &operator*=(R r) { _multiply_1(data(), size(), r); return *this; } Polynomial operator*(R r) const { Polynomial res; res.resize(size()); _multiply_1(res.data(), data(), size(), r); return res; } Polynomial operator*(const Polynomial &that) const { Polynomial r; multiply(r, *this, that); return r; } Polynomial &operator*=(const Polynomial &that) { multiply(*this, *this, that); return *this; } static void multiply(Polynomial &res, const Polynomial &p, const Polynomial &q) { int pn = p.size(), qn = q.size(); if(pn < qn) return multiply(res, q, p); if(&res == &p || &res == &q) { Polynomial tmp; multiply(tmp, p, q); res = tmp; return; } if(qn == 0) { res.coefs.clear(); } else { res.resize(pn + qn - 1); _multiply_select_method(res.data(), p.data(), pn, q.data(), qn); } } Polynomial operator-() const { Polynomial res; res.resize(size()); _negate(res.data(), data(), size()); return res; } Polynomial precomputeInverse(int n) const { Polynomial res; res.resize(n); _precompute_inverse(res.data(), n, data(), size()); return res; } static void divideRemainderPrecomputedInverse(Polynomial ", Polynomial &rem, const Polynomial &p, const Polynomial &q, const Polynomial &inv) { assert(" != &p && " != &q && " != &inv); int pn = p.size(), qn = q.size(); assert(inv.size() >= pn - qn + 1); quot.resize(std::max(0, pn - qn + 1)); rem.resize(qn - 1); _divide_remainder_precomputed_inverse(quot.data(), rem.data(), p.data(), pn, q.data(), qn, inv.data()); quot.normalize(); rem.normalize(); } Polynomial computeRemainderPrecomputedInverse(const Polynomial &q, const Polynomial &inv) const { Polynomial quot, rem; divideRemainderPrecomputedInverse(quot, rem, *this, q, inv); return rem; } Polynomial powerMod(long long K, const Polynomial &q) const { int qn = q.size(); assert(K >= 0 && qn > 0); assert(q.monic()); if(qn == 1) return Polynomial(); if(K == 0) return One(); Polynomial inv = q.precomputeInverse(std::max(size() - qn + 1, qn)); Polynomial p = this->computeRemainderPrecomputedInverse(q, inv); int l = 0; while((K >> l) > 1) ++ l; Polynomial res = p; for(-- l; l >= 0; -- l) { res *= res; res = res.computeRemainderPrecomputedInverse(q, inv); if(K >> l & 1) { res *= p; res = res.computeRemainderPrecomputedInverse(q, inv); } } return res; } static int TOOM22_THRESHOLD; static int SCHONHAGE_STRASSEN_THRESHOLD; //private: class WorkSpaceStack; static void _fill_zero(R *res, int n); static void _copy(R *res, const R *p, int n); static void _negate(R *res, const R *p, int n); static void _add(R *p, const R *q, int n); static void _add(R *res, const R *p, int pn, const R *q, int qn); static void _subtract(R *p, const R *q, int n); static void _subtract(R *res, const R *p, int pn, const R *q, int qn); static void _multiply_select_method(R *res, const R *p, int pn, const R *q, int qn); static void _square_select_method(R *res, const R *p, int pn); public: static int _schonhage_strassen_determine_first_k(int n); static int _schonhage_strassen_determine_recurse_k(int n, int k); //private: static void _multiply_1(R *p, const R *q, int n, R c0); static void _multiply_1(R *p, int n, R c0); static void _multiply_power_of_two(R *res, const R *p, int n, int k); static void _divide_power_of_two(R *res, const R *p, int n, int k); static void _schoolbook_multiplication(R *res, const R *p, int pn, const R *q, int qn); static void _toom22(R *res, const R *p, int pn, const R *q, int qn, WorkSpaceStack &wss); static int _toom22_estimate_workspace(int pn, int qn); static void _schonhage_strassen(R *res, const R *p, int pn, const R *q, int qn); static void _schonhage_strassen_recurse(R *res, const R *X, const R *Y, int N, int k); static void _schonhage_strassen_decompose(R *x, const R *X, int k, int n, int m, R *tmp); static void _fft_make_omega_table(int n, int k, unsigned short *omega_table); static void _fft_rec(R *x, int n, int inc, int k, R *tmp, const unsigned short *omega_table); static void _fft_inverse_rec(R *x, int n, int k, R *tmp); static void _negacyclic_shift(R *res, const R *x, int n, int shift); static void _negacyclic_shift_add(R *p, int pn, const R *q, int qn, int shift); static void _reverse(R *res, const R *p, int pn); static void _inverse_power_series(R *res, int resn, const R *p, int pn); static void _precompute_inverse(R *res, int resn, const R *p, int pn); static void _divide_precomputed_inverse(R *res, int resn, const R *revp, int pn, const R *inv); static void _divide_remainder_precomputed_inverse(R *quot, R *rem, const R *p, int pn, const R *q, int qn, const R *inv); class WorkSpaceStack { public: explicit WorkSpaceStack(int n_) : n(n_), used(0), max_used(0) { p = static_cast(std::malloc(n * sizeof(R))); } ~WorkSpaceStack() { std::free(p); } R *allocate(int size) { if(size > n - used) { std::abort(); } R *t = p + used; used += size; if(used > max_used) max_used = used; return t; } void deallocate(R *t, int size) { if(size > used || p + (used - size) != t) { std::abort(); } used -= size; } private: R *p; int n, used, max_used; WorkSpaceStack(const WorkSpaceStack &); WorkSpaceStack &operator=(const WorkSpaceStack &); }; class WorkSpace { public: explicit WorkSpace(WorkSpaceStack &stk_, int n_, bool fill_zero = false) : stk(&stk_), n(n_) { p = stk->allocate(n_); if(fill_zero) _fill_zero(p, n); } explicit WorkSpace(int n_, bool fill_zero = false) : stk(NULL), n(n_) { p = static_cast(std::malloc(n * sizeof(R))); if(fill_zero) _fill_zero(p, n); } ~WorkSpace() { if(stk != NULL) { stk->deallocate(p, n); } else { std::free(p); } } R *get() { return p; } private: WorkSpaceStack *stk; R *p; int n; WorkSpace(const WorkSpace &); WorkSpace &operator=(const WorkSpace &); }; }; int Polynomial::TOOM22_THRESHOLD = 16; int Polynomial::SCHONHAGE_STRASSEN_THRESHOLD = 120; void Polynomial::_fill_zero(R *res, int n) { for(int i = 0; i < n; ++ i) res[i] = ZeroR(); } void Polynomial::_copy(R *res, const R *p, int n) { for(int i = 0; i < n; ++ i) res[i] = p[i]; } #ifndef __GNUC__ inline __m128i add_mod_128i(const __m128i &x, const __m128i &y, const __m128i &mod) { __m128i sum = _mm_add_epi32(x, y); __m128i lt_mod_mask = _mm_cmpgt_epi32(mod, sum); __m128i mod_sub = _mm_andnot_si128(lt_mod_mask, mod); __m128i result = _mm_sub_epi32(sum, mod_sub); return result; } inline __m128i sub_mod_128i(const __m128i &x, const __m128i &y, const __m128i &zero, const __m128i &mod) { __m128i diff = _mm_sub_epi32(x, y); __m128i lt_0_mask = _mm_cmpgt_epi32(zero, diff); __m128i mod_add = _mm_and_si128(lt_0_mask, mod); __m128i result = _mm_add_epi32(diff, mod_add); return result; } void Polynomial::_negate(R *res, const R *p, int n) { if(n >= 4) { __m128i zero = _mm_setzero_si128(); __m128i mod = _mm_set1_epi32(R::Mod); for(int i = 0; i + 3 < n; i += 4) { __m128i x = _mm_loadu_si128(reinterpret_cast(p + i)); __m128i result = sub_mod_128i(zero, x, zero, mod); _mm_storeu_si128(reinterpret_cast<__m128i*>(res + i), result); } } for(int i = n - (n & 3); i < n; ++ i) res[i] = -p[i]; } void Polynomial::_add(R *res, const R *p, int pn, const R *q, int qn) { assert(pn >= qn); if(qn >= 4) { __m128i mod = _mm_set1_epi32(R::Mod); for(int i = 0; i + 3 < qn; i += 4) { __m128i x = _mm_loadu_si128(reinterpret_cast(p + i)); __m128i y = _mm_loadu_si128(reinterpret_cast(q + i)); __m128i result = add_mod_128i(x, y, mod); _mm_storeu_si128(reinterpret_cast<__m128i*>(res + i), result); } } for(int i = qn - (qn & 3); i < qn; ++ i) res[i] = p[i] + q[i]; _copy(res + qn, p + qn, pn - qn); } void Polynomial::_subtract(R *res, const R *p, int pn, const R *q, int qn) { assert(pn >= qn); if(qn >= 4) { __m128i zero = _mm_setzero_si128(); __m128i mod = _mm_set1_epi32(R::Mod); for(int i = 0; i + 3 < qn; i += 4) { __m128i x = _mm_loadu_si128(reinterpret_cast(p + i)); __m128i y = _mm_loadu_si128(reinterpret_cast(q + i)); __m128i result = sub_mod_128i(x, y, zero, mod); _mm_storeu_si128(reinterpret_cast<__m128i*>(res + i), result); } } for(int i = qn - (qn & 3); i < qn; ++ i) res[i] = p[i] - q[i]; _copy(res + qn, p + qn, pn - qn); } #else //AVX未対応のCPUにも対応したバージョン (AVXバージョンの命令を使うと少し速くなるけど) void Polynomial::_negate(R *res, const R *p, int n) { if(n >= 4) { __attribute__((aligned(16))) const unsigned mods[4] = { R::Mod, R::Mod, R::Mod, R::Mod }; __asm( " pxor %%xmm2, %%xmm2;" " movdqa %0, %%xmm3;" ::"m"(mods)); for(int i = 0; i + 3 < n; i += 4) { __asm( " movdqu %1, %%xmm0;" " movdqa %%xmm2, %%xmm1;" " psubd %%xmm0, %%xmm1;" " movdqa %%xmm2, %%xmm0;" " pcmpgtd %%xmm1, %%xmm0;" " pand %%xmm3, %%xmm0;" " paddd %%xmm1, %%xmm0;" " movups %%xmm0, %0;" :"=m"(res[i]):"m"(p[i])); } } for(int i = n - (n & 3); i < n; ++ i) res[i] = -p[i]; } void Polynomial::_add(R *res, const R *p, int pn, const R *q, int qn) { assert(pn >= qn); if(qn >= 4) { __attribute__((aligned(16))) const unsigned mods[4] = { R::Mod, R::Mod, R::Mod, R::Mod }; __asm( " movdqa %0, %%xmm2;" ::"m"(mods)); for(int i = 0; i + 3 < qn; i += 4) { __asm( " movdqu %1, %%xmm0;" " movdqu %2, %%xmm1;" " paddd %%xmm1, %%xmm0;" " movdqa %%xmm2, %%xmm1;" " pcmpgtd %%xmm0, %%xmm1;" " pandn %%xmm2, %%xmm1;" " psubd %%xmm1, %%xmm0;" " movups %%xmm0, %0;" :"=m"(res[i]):"m"(p[i]),"m"(q[i])); } } for(int i = qn - (qn & 3); i < qn; ++ i) res[i] = p[i] + q[i]; _copy(res + qn, p + qn, pn - qn); } void Polynomial::_subtract(R *res, const R *p, int pn, const R *q, int qn) { assert(pn >= qn); if(qn >= 4) { __attribute__((aligned(16))) const unsigned mods[4] = { R::Mod, R::Mod, R::Mod, R::Mod }; __asm( " pxor %%xmm2, %%xmm2;" " movdqa %0, %%xmm3;" ::"m"(mods)); for(int i = 0; i + 3 < qn; i += 4) { __asm( " movdqu %1, %%xmm4;" " movdqu %2, %%xmm1;" " movdqa %%xmm2, %%xmm0;" " psubd %%xmm1, %%xmm4;" " pcmpgtd %%xmm4, %%xmm0;" " pand %%xmm3, %%xmm0;" " paddd %%xmm4, %%xmm0;" " movups %%xmm0, %0;" :"=m"(res[i]):"m"(p[i]),"m"(q[i])); } } for(int i = qn - (qn & 3); i < qn; ++ i) res[i] = p[i] - q[i]; _copy(res + qn, p + qn, pn - qn); } #endif void Polynomial::_add(R *p, const R *q, int n) { _add(p, p, n, q, n); } void Polynomial::_subtract(R *p, const R *q, int n) { _subtract(p, p, n, q, n); } void Polynomial::_multiply_1(R *res, const R *p, int n, R c0) { for(int i = 0; i < n; ++ i) res[i] = p[i] * c0; } void Polynomial::_multiply_1(R *p, int n, R c0) { _multiply_1(p, p, n, c0); } void Polynomial::_multiply_power_of_two(R *res, const R *p, int n, int k) { assert(0 < k && k < 31); R mul = R(1 << k); _multiply_1(res, p, n, mul); } void Polynomial::_divide_power_of_two(R *res, const R *p, int n, int k) { assert(0 < k && k < 31); static const R Inv2 = R(2).inverse(); R inv = k == 1 ? Inv2 : R(1 << k).inverse(); _multiply_1(res, p, n, inv); } void Polynomial::_multiply_select_method(R *res, const R *p, int pn, const R *q, int qn) { if(pn < qn) std::swap(p, q), std::swap(pn, qn); assert(res != p && res != q && pn >= qn && qn > 0); int rn = pn + qn - 1; if(qn == 1) { _multiply_1(res, p, pn, q[0]); } else if(qn < TOOM22_THRESHOLD) { _schoolbook_multiplication(res, p, pn, q, qn); } else if(rn < SCHONHAGE_STRASSEN_THRESHOLD) { int t = 2; int m = (pn + t - 1) / t * (t - 1) + 1; if(qn >= m) { WorkSpaceStack wss(_toom22_estimate_workspace(pn, qn)); _toom22(res, p, pn, q, qn, wss); } else { WorkSpaceStack wss((m + (m + pn - 1)) + _toom22_estimate_workspace(pn, m)); WorkSpace ws(wss, m + (m + pn - 1)); _copy(ws.get(), q, qn); _fill_zero(ws.get() + qn, m - qn); _toom22(ws.get() + m, p, pn, ws.get(), m, wss); _copy(res, ws.get() + m, pn + qn - 1); } } else { _schonhage_strassen(res, p, pn, q, qn); } } void Polynomial::_square_select_method(R *res, const R *p, int pn) { _multiply_select_method(res, p, pn, p, pn); } int Polynomial::_schonhage_strassen_determine_first_k(int n) { static const int thresholds[][2] = { {1, 1}, {5, 2}, {25, 3}, {33, 2}, {49, 3}, {65, 2}, {73, 3}, {97, 4}, {129, 3}, {193, 4}, {257, 3}, {289, 4}, {385, 5}, {513, 4}, {769, 5}, {1025, 4}, {1153, 5}, {2001, 6}, {2049, 5}, {3073, 6}, {4097, 5}, {4609, 6}, {6145, 7}, {8193, 6}, {12289, 7}, {24577, 8}, {32769, 7}, {40961, 8}, {65537, 9}, {131073, 8}, {229377, 9}, {393217, 10}, {524289, 9}, {655361, 10}, {1048577, 11}, {2097153, 10}, {3670017, 11}, }; static const int NumThreshold = sizeof(thresholds) / sizeof(*thresholds); if(n < SCHONHAGE_STRASSEN_THRESHOLD) { return 0; } else { int i = 0; while(i < NumThreshold - 1 && thresholds[i + 1][0] <= n) ++ i; return thresholds[i][1]; } } int Polynomial::_schonhage_strassen_determine_recurse_k(int n, int k) { return _schonhage_strassen_determine_first_k(n); } #if 0 void Polynomial::_schoolbook_multiplication(R *res, const R *p, int pn, const R *q, int qn) { if(qn == 1) { _multiply_1(res, p, pn, q[0]); return; } assert(res != p && res != q && pn >= qn && qn > 0); _fill_zero(res, pn + qn - 1); for(int i = 0; i < pn; ++ i) for(int j = 0; j < qn; ++ j) res[i + j] += p[i] * q[j]; } #else inline unsigned long long mint_max_prod() { return (unsigned long long)(mint::Mod - 1) * (mint::Mod - 1); } inline unsigned long long mint_max_value() { return ~0ULL - mint_max_prod(); } //mint_sub_value > ~0ULL / 2 inline unsigned long long mint_sub_value() { return ((~0ULL / 2) / mint::Mod + 1) * mint::Mod; } inline unsigned long long montgomery_max_value() { return ((unsigned long long)mint::Mod << mint::W) - 1; } inline unsigned long long montgomery_sub_value() { return (((montgomery_max_value() + mint_max_prod()) / 2) / mint::Mod + 1) * mint::Mod; } inline void schoolbook_sum_and_reduce(mint *res, const mint *p, int pn, const mint *q, int qn) { #ifdef MP_64 const int first_count = (int)min(999ULL, mint_max_value() / mint_max_prod() + 1); const int second_count = (int)min(999ULL, ((mint_max_value() - (mint_sub_value() - 1)) / mint_max_prod() + 1)); #endif for(int i = 0; i < pn + qn - 1; ++ i) { int jlower = i - qn + 1 >= 0 ? i - (qn - 1) : 0; int jupper = pn <= i ? pn - 1 : i; #ifdef MP_64 unsigned long long sum = 0; int j = jlower; int firstu = j + first_count; if(firstu >= jupper) goto rem_j; for(; j < firstu; ++ j) sum += (unsigned long long)p[j].residue * q[i - j].residue; if(sum >= mint_sub_value()) sum -= mint_sub_value(); while(1) { int u = j + second_count; if(u >= jupper) break; for(; j < u; ++ j) sum += (unsigned long long)p[j].residue * q[i - j].residue; if(sum >= mint_sub_value()) sum -= mint_sub_value(); } rem_j: for(; j <= jupper; ++ j) sum += (unsigned long long)p[j].residue * q[i - j].residue; res[i].residue = mint::reduce((unsigned)(sum % mint::Mod)); #else unsigned long long sum = 0; for(int j = jlower; j <= jupper; ++ j) { sum += (unsigned long long)p[j].residue * q[i-j].residue; if(sum >= montgomery_sub_value()) sum -= montgomery_sub_value(); } res[i].residue = mint::reduce(sum); #endif } } void Polynomial::_schoolbook_multiplication(R *res, const R *p, int pn, const R *q, int qn) { assert(res != p && res != q && pn >= qn && qn > 1); schoolbook_sum_and_reduce(res, p, pn, q, qn); } #endif void Polynomial::_toom22(R *res, const R *p, int pn, const R *q, int qn, WorkSpaceStack &wss) { assert(res != p && res != q && pn >= qn); assert(qn >= TOOM22_THRESHOLD); int lo = (pn + 1) / 2, hp = pn - lo, hq = qn - lo; assert(0 < lo && 0 < hq && hp <= lo && hq <= lo); //r0 = pq(0), r1 = pq(1), rinf = pq(inf) //w0 = r0, w1 = -r0 + r1 - rinf, w2 = rinf WorkSpace ws(wss, lo * 4 - 1); R *t0 = ws.get(), *t1 = t0 + lo; R *r0 = res, *r1 = t1 + lo, *rinf = res + lo * 2; _add(t0, p, lo, p + lo, hp); _add(t1, q, lo, q + lo, hq); if(lo < TOOM22_THRESHOLD) { _schoolbook_multiplication(r1, t0, lo, t1, lo); _schoolbook_multiplication(r0, p, lo, q, lo); } else { _toom22(r1, t0, lo, t1, lo, wss); _toom22(r0, p, lo, q, lo, wss); } res[lo + lo - 1] = ZeroR(); if(hq < TOOM22_THRESHOLD) { _schoolbook_multiplication(rinf, p + lo, hp, q + lo, hq); } else { _toom22(rinf, p + lo, hp, q + lo, hq, wss); } _subtract(r1, r0, lo + lo - 1); _subtract(r1, rinf, hp + hq - 1); _add(res + lo, r1, lo + lo - 1); } int Polynomial::_toom22_estimate_workspace(int pn, int qn) { assert(pn >= qn); if(qn < TOOM22_THRESHOLD) return 0; int lo = (pn + 1) / 2; return lo * 4 - 1 + _toom22_estimate_workspace(lo, lo); } void Polynomial::_schonhage_strassen(R *res, const R *p, int pn, const R *q, int qn) { int rn = pn + qn - 1; int k = _schonhage_strassen_determine_first_k(rn); assert(k > 0); int K = 1 << k; int N = (rn + K - 1) >> k << k; WorkSpace ws(N * 3); R *Z = ws.get(), *X = Z + N, *Y = X + N; _copy(X, p, pn); _fill_zero(X + pn, N - pn); _copy(Y, q, qn); _fill_zero(Y + qn, N - qn); _schonhage_strassen_recurse(Z, X, Y, N, k); _copy(res, Z, rn); } void Polynomial::_schonhage_strassen_recurse(R *res, const R *X, const R *Y, int N, int k) { assert(1 < k && k <= 15); assert(0 < N && (N & ((1 << k) - 1)) == 0); int K = 1 << k; int m = N >> k; int n = (m + m - 1 + K - 1) >> k << k, n2 = n * 2; int nK = n << k; assert(n < N); // cerr << "s.s. recurse N = " << N << ", k = " << k << ", n = " << n << endl; WorkSpace ws(nK * 2 + n * 2, true); R *x = ws.get(), *y = x + nK, *tmp = y + nK; _schonhage_strassen_decompose(x, X, k, n, m, tmp); _schonhage_strassen_decompose(y, Y, k, n, m, tmp); unsigned short *omega_table = new unsigned short[K - 1]; _fft_make_omega_table(n, k, omega_table); _fft_rec(x, n, n, k, tmp, omega_table); _fft_rec(y, n, n, k, tmp, omega_table); int nextk = _schonhage_strassen_determine_recurse_k(n, k); assert(nextk <= k); if(nextk == 0) { WorkSpaceStack wss(_toom22_estimate_workspace(n, n)); for(int i = 0; i < nK; i += n) { _toom22(tmp, x + i, n, y + i, n, wss); _copy(x + i, tmp, n); _subtract(x + i, tmp + n, n - 1); } } else { for(int i = 0; i < nK; i += n) { _schonhage_strassen_recurse(x + i, x + i, y + i, n, nextk); } } _fft_inverse_rec(x, n, k, tmp); _divide_power_of_two(x, x, nK, k); delete[] omega_table; int inv_omega_k = n2 - (n >> k); _fill_zero(res, N); for(int i = 0, j = 0, shift = 0; i < nK; i += n, j += m) { if(j >= 2 * N) j -= 2 * N; _negacyclic_shift(tmp, x + i, n, shift); _negacyclic_shift_add(res, N, tmp, n, j); shift += inv_omega_k; if(shift >= n2) shift -= n2; } } void Polynomial::_schonhage_strassen_decompose(R *x, const R *X, int k, int n, int m, R *tmp) { int N = m << k, n2 = n * 2, omega = n >> k; for(int i = 0, j = 0, shift = 0; j < N; i += n, j += m) { _copy(tmp, X + j, m); _fill_zero(tmp + m, n - m); _negacyclic_shift(x + i, tmp, n, shift); shift += omega; if(shift >= n2) shift -= n2; } } void Polynomial::_fft_make_omega_table(int n, int k, unsigned short *omega_table) { int n2 = n * 2; unsigned short *row = omega_table; for(int i = k; i > 0; -- i) { int hK = 1 << (i - 1), hhK = hK >> 1; int omega = n2 >> i; int r = 0; for(int j = 0; j < hK; ++ j) { //r = bitrev(j * 2, i) = bitrev(j, i-1) row[j] = r * omega % n2; for(int h = hhK; h > (r ^= h); h >>= 1); } row += hK; } } void Polynomial::_fft_rec(R *x, int n, int inc, int k, R *tmp, const unsigned short *omega_table) { if(k == 0) return; int hK = 1 << (k - 1); R *x0 = x, *x1 = x + inc; _fft_rec(x0, n, inc * 2, k - 1, tmp, omega_table + hK); _fft_rec(x1, n, inc * 2, k - 1, tmp, omega_table + hK); for(int i = 0; i < hK; ++ i, x0 += inc * 2, x1 += inc * 2) { int shift = omega_table[i]; _negacyclic_shift(tmp, x1, n, shift); _subtract(x1, x0, n, tmp, n); _add(x0, tmp, n); } } void Polynomial::_fft_inverse_rec(R *x, int n, int k, R *tmp) { if(k == 0) return; int hK = 1 << (k - 1); R *x0 = x, *x1 = x + (n << (k - 1)); _fft_inverse_rec(x0, n, k - 1, tmp); _fft_inverse_rec(x1, n, k - 1, tmp); int n2 = n * 2, omega = n2 - (n2 >> k); for(int i = 0, shift = 0; i < hK; ++ i, x0 += n, x1 += n) { _negacyclic_shift(tmp, x1, n, shift); _subtract(x1, x0, n, tmp, n); _add(x0, tmp, n); shift += omega; if(shift >= n2) shift -= n2; } } void Polynomial::_negacyclic_shift(R *res, const R *x, int n, int shift) { assert(0 <= shift && shift <= n * 2); if(shift < n) { _negate(res, x + (n - shift), shift); _copy(res + shift, x, n - shift); } else { int s = shift - n; _copy(res, x + (n - s), s); _negate(res + s, x, n - s); } } void Polynomial::_negacyclic_shift_add(R *p, int pn, const R *q, int qn, int shift) { assert(p != q && pn >= qn && 0 <= shift && shift < pn * 2); if(shift < pn) { if(shift + qn <= pn) { _add(p + shift, q, qn); } else { int t = pn - shift; _subtract(p, q + t, qn - t); _add(p + shift, q, t); } } else { int s = shift - pn; if(s + qn <= pn) { _subtract(p + s, q, qn); } else { int t = pn - s; _add(p, q + t, qn - t); _subtract(p + s, q, t); } } } void Polynomial::_reverse(R *res, const R *p, int pn) { if(res == p) { std::reverse(res, res + pn); } else { for(int i = 0; i < pn; ++ i) res[pn - 1 - i] = p[i]; } } void Polynomial::_inverse_power_series(R *res, int resn, const R *p, int pn) { if(resn == 0) return; assert(res != p); assert(p[0] == OneR()); WorkSpace ws(resn * 4); R *tmp1 = ws.get(), *tmp2 = tmp1 + resn * 2; _fill_zero(res, resn); res[0] = p[0]; int curn = 1; while(curn < resn) { int nextn = std::min(resn, curn * 2); _square_select_method(tmp1, res, curn); _multiply_select_method(tmp2, tmp1, std::min(nextn, curn * 2 - 1), p, std::min(nextn, pn)); _multiply_power_of_two(res, res, curn, 1); _subtract(res, tmp2, nextn); curn = nextn; } } void Polynomial::_precompute_inverse(R *res, int resn, const R *p, int pn) { WorkSpace ws(pn); R *tmp = ws.get(); _reverse(tmp, p, pn); _inverse_power_series(res, resn, tmp, pn); } void Polynomial::_divide_precomputed_inverse(R *res, int resn, const R *revp, int pn, const R *inv) { WorkSpace ws(pn + resn); R *tmp = ws.get(); _multiply_select_method(tmp, revp, pn, inv, resn); _reverse(res, tmp, resn); } void Polynomial::_divide_remainder_precomputed_inverse(R *quot, R *rem, const R *p, int pn, const R *q, int qn, const R *inv) { if(pn < qn) { _copy(rem, p, pn); _fill_zero(rem + pn, qn - 1 - pn); return; } assert(qn > 0); assert(q[qn - 1] == OneR()); if(qn == 1) return; int quotn = pn - qn + 1; int rn = qn - 1, tn = std::min(quotn, rn), un = tn + rn; WorkSpace ws(pn + un + (quot != NULL ? 0 : quotn)); R *revp = ws.get(), *quotmul = revp + pn; if(quot == NULL) quot = quotmul + un; _reverse(revp, p, pn); _divide_precomputed_inverse(quot, quotn, revp, pn, inv); _multiply_select_method(quotmul, q, rn, quot, tn); _subtract(rem, p, rn, quotmul, rn); } } //なんか前の提出ではここに嘘を書いていた気がする… vector doDP(const int a[6], int n) { int maxX = a[5] * n; vector > dp(n+1, vector(maxX+1)); dp[0][0] = 1; int z = a[0]; rep(k, 6) { int d = a[k]; for(int t = 0; t < n; ++ t) { for(int i = t * z; i <= t * d; ++ i) { mint x = dp[t][i]; if(x.residue != 0) dp[t + 1][i + d] += x; } } } return dp[n]; } //a_{i+n} = \sum_{j=0}^{n-1} c[j] a_{i+j} vector computeLinearRecurrentSequenceCoefficients(const vector &c, long long N) { using namespace mod_polynomial; int n = (int)c.size(); Polynomial poly; poly.resize(n+1); rep(i, n) poly.set(i, -c[i]); poly.set(n, 1); Polynomial resp = Polynomial::X().powerMod(N, poly); vector res(n); rep(i, n) res[i] = resp.get(i); return res; } int main() { long long N; int P; int C; while(cin >> N >> P >> C) { const int primes[6] = { 2, 3, 5, 7, 11, 13 }; const int composites[6] = { 4, 6, 8, 9, 10, 12 }; vector cntP, cntC; cntP = doDP(primes, P); cntC = doDP(composites, C); int X = P * primes[5] + C * composites[5]; vector ways(X+1); { using mod_polynomial::Polynomial; Polynomial p(cntP.begin(), cntP.end()); Polynomial c(cntC.begin(), cntC.end()); Polynomial pc = p * c; rer(i, 0, X) ways[i] = pc.get(i); } vector c(X); rep(i, X) c[i] = ways[X - i]; vector coeffs; coeffs = computeLinearRecurrentSequenceCoefficients(c, X + (N - 1)); mint ans = accumulate(all(coeffs), mint()); printf("%d\n", ans.get()); } return 0; }