#include #define debug(x) cerr << #x << ": " << x << endl #define debugArray(x,n) for(long long hoge = 0; (hoge) < (n); ++ (hoge)) cerr << #x << "[" << hoge << "]: " << x[hoge] << endl using namespace std; typedef long long ll; typedef unsigned long long ull; typedef vector vll; const ll INF = LLONG_MAX/2; const ll MOD = 1e9+7; using u32 = unsigned; using u64 = unsigned long long; namespace ntt { template class Mod64 { private: using u128 = __uint128_t; static constexpr u64 mul_inv(u64 n, int e=6, u64 x=1) { return e == 0 ? x : mul_inv(n, e-1, x*(2-x*n)); } public: static constexpr u64 inv = mul_inv(mod); static constexpr u64 r2 = -u128(mod) % mod; static constexpr int level = __builtin_ctzll(mod - 1); static_assert(inv * mod == 1, "invalid 1/M modulo 2^64."); Mod64() {} Mod64(u64 n) : x(init(n)) {}; static u64 modulo() { return mod; } static u64 init(u64 w) { return reduce(u128(w) * r2); } static u64 reduce(const u128 w) { return u64(w >> 64) + mod - ((u128(u64(w) * inv) * mod) >> 64); } static Mod64 omega() { return Mod64(prim_root).pow((mod - 1) >> level); } Mod64& operator += (Mod64 rhs) { this->x += rhs.x; return *this; } Mod64& operator -= (Mod64 rhs) { this->x += 2 * mod - rhs.x; return *this; } Mod64& operator *= (Mod64 rhs) { this->x = reduce(u128(this->x) * rhs.x); return *this; } Mod64 operator + (Mod64 rhs) const { return Mod64(*this) += rhs; } Mod64 operator - (Mod64 rhs) const { return Mod64(*this) -= rhs; } Mod64 operator * (Mod64 rhs) const { return Mod64(*this) *= rhs; } u64 get() const { return reduce(this->x) % mod; } void set(u64 n) const { this->x = n; } Mod64 pow(u64 exp) const { Mod64 ret = Mod64(1); for (Mod64 base = *this; exp; exp >>= 1, base *= base) if (exp & 1) ret *= base; return ret; } Mod64 inverse() const { return pow(mod - 2); } friend ostream& operator << (ostream& os, const Mod64& m) { return os << m.get(); } static void Debug() { printf("%llu %llu %llu %llu\n", mod, inv, r2, omega().get()); } u64 x; }; template void convolute(mod_t* A, int s1, mod_t* B, int s2, bool cyclic=false) { int s = (cyclic ? max(s1, s2) : s1 + s2 - 1); int size = 1; while (size < s) size <<= 1; mod_t roots[mod_t::level] = { mod_t::omega() }; for(int i=1;i void rev_permute(mod_t* A, int n) { int r = 0, nh = n >> 1; for(int i=1;i>= 1); if (r > i) swap(A[i], A[r]); } } template void ntt_dit4(mod_t* A, int n, int sign, mod_t* roots) { rev_permute(A, n); int logn = __builtin_ctz(n); if (logn & 1) for(int i=0;i> 2; mod_t dw = roots[mod_t::level - e]; if (sign < 0) dw = dw.inverse(); const int block_size = max(m, (1 << 15) / int(sizeof(A[0]))); for(int k=0;k; using m64_2 = ntt::Mod64<35012573396993, 3>; m64_1 f1[size], g1[size]; m64_2 f2[size], g2[size]; } // namespace ntt using R = u32; class poly { public: poly() {} poly(int n) : coefs(n) {} poly(int n, int c) : coefs(n, c % mod) {} poly(const vector& v) : coefs(v) {} poly(const poly& f, int beg, int end=-1) { if (end < 0) end = beg, beg = 0; resize(end - beg); for(int i=beg;i= mod) a -= mod; } static void sub(R& a, R b) { if (int(a -= b) < 0) a += mod; } poly operator - () { poly ret = *this; for(int i=0;i<(int)ret.size();i++) ret[i] = (ret[i] == 0 ? 0 : mod - ret[i]); return ret; } poly& operator += (const poly& rhs) { if (size() < rhs.size()) resize(rhs.size()); for(int i=0;i<(int)rhs.size();i++) add(coefs[i], rhs[i]); return *this; } poly& operator -= (const poly& rhs) { if (size() < rhs.size()) resize(rhs.size()); for(int i=0;i<(int)rhs.size();i++) sub(coefs[i], rhs[i]); return *this; } poly& operator *= (const poly& rhs) { return *this = *this * rhs; } poly operator + (const poly& rhs) const { return poly(*this) += rhs; } poly operator - (const poly& rhs) const { return poly(*this) -= rhs; } poly operator * (const poly& rhs) const { return this->mul(rhs); } struct fast_div { using u128 = __uint128_t; fast_div(u64 n) : m(n) { s = (n == 1) ? 0 : 127 - __builtin_clzll(n - 1); x = ((u128(1) << s) + n - 1) / n; } friend u64 operator / (u64 n, fast_div d) { return u128(n) * d.x >> d.s; } friend u64 operator % (u64 n, fast_div d) { return n - n / d * d.m; } u64 m, s, x; }; private: poly mul_crt(int beg, int end) const { using namespace ntt; auto inv = m64_2(m64_1::modulo()).inverse(); auto fast_mod = fast_div(mod); auto mod1 = m64_1::modulo() % fast_mod; poly ret(end - beg); for(int i=0;i<(int)ret.size();i++) { u64 r1 = f1[i + beg].get(), r2 = f2[i + beg].get(); ret[i] = (r1 + (m64_2(r2 + m64_2::modulo() - r1) * inv).get() % fast_mod * mod1) % fast_mod; } return ret; } static void mul2(const poly& f, const poly &g, bool cyclic=false) { using namespace ntt; if (&f == &g) { for(int i=0;i<(int)f.size();i++) f1[i] = f[i]; convolute(f1, f.size(), f1, f.size(), cyclic); for(int i=0;i<(int)f.size();i++) f2[i] = f[i]; convolute(f2, f.size(), f2, f.size(), cyclic); } else { for(int i=0;i<(int)f.size();i++) f1[i] = f[i]; for(int i=0;i<(int)g.size();i++) g1[i] = g[i]; convolute(f1, f.size(), g1, g.size(), cyclic); for(int i=0;i<(int)f.size();i++) f2[i] = f[i]; for(int i=0;i<(int)g.size();i++) g2[i] = g[i]; convolute(f2, f.size(), g2, g.size(), cyclic); } } public: // 1.0 * M(n) poly mul(const poly& g) const { const auto& f = *this; if (f.size() == 0 || g.size() == 0) return poly(); mul2(f, g, false); return mul_crt(0, f.size() + g.size() - 1); } // 0.5 * M(n) poly mul_cyclically(const poly& g) const { const auto& f = *this; if (f.size() == 0 || g.size() == 0) return poly(); mul2(f, g, true); int s = max(f.size(), g.size()), size = 1; while (size < s) size <<= 1; return mul_crt(0, size); } // 1.0 * M(n) poly middle_product(const poly& g) const { const poly& f = *this; if (f.size() == 0 || g.size() == 0) return poly(); mul2(f, g, true); return mul_crt(f.size(), g.size()); } // 2.0 * M(n) poly inverse(int prec=-1) const { if (prec < 0) prec = size(); poly ret(1, 1); for (int e = 1, ne; e < prec; e = ne) { ne = min(2 * e, prec); poly h = poly(ret, ne - e) * -ret.middle_product(poly(*this, ne)); for(int i=e;i divrem(const poly& b) const { if (size() < b.size()) return make_pair(poly(), poly(*this)); assert(size() < 2 * b.size()); int sq = size() - b.size() + 1; poly q = poly(*this, sq).quotient(poly(b, sq)); poly r = sub_mul(*this, q, b); return make_pair(q, r); } // 3.0 * M(n) poly rem(const poly& b) const { return divrem(b).second; } // 1.5 * M(n) pair divrem_pre(const poly& b, const poly& inv) const { if (size() < b.size()) return make_pair(poly(), poly(*this)); int sq = size() - b.size() + 1; assert(size() >= sq && inv.size() >= sq); poly q = poly(poly(*this, sq) * poly(inv, sq), sq); poly r = sub_mul(*this, q, b); return make_pair(q, r); } // 1.5 * M(n) poly rem_pre(const poly& f, const poly& inv) const { return divrem_pre(f, inv).second; } // 13/6 * M(n) * log2_(e) : x^e (mod f) static poly x_pow_mod(u64 e, const poly& f) { if (e == 0) return poly(1, 1); poly ret = poly(vector({1, 0})); poly inv = f.inverse(f.size()); ret = ret.rem_pre(f, inv); u64 mask = (u64(1) << ilog2(e)) >> 1; while (mask) { ret *= ret; if (e & mask) ret.push_back(0); ret = ret.rem_pre(f, inv); mask >>= 1; } return ret; } public: vector coefs; static R mod; }; R poly::mod; double tick() { static clock_t oldtick; clock_t newtick = clock(); double diff = 1.0*(newtick - oldtick) / CLOCKS_PER_SEC; oldtick = newtick; return diff; } signed main(){ cin.tie(0); ios::sync_with_stdio(false); //tick(); poly::mod=MOD; ll K;cin>>K; ll N;cin>>N; poly f(1<<17,0); for(ll i=0;i>x; f[x]=MOD-1; } f[0]=1; //cerr<<"???"<