#include #include using namespace std; typedef long long ll; typedef pair Pii; #define FOR(i,n) for(int i = 0; i < (n); i++) #define ten(x) ((int)1e##x) template T extgcd(T a, T b, T & x, T & y) { for (T u = y = 1, v = x = 0; a;) { T q = b / a; swap(x -= q * u, u); swap(y -= q * v, v); swap(b -= q * a, a); } return b; } template T mod_inv(T a, T m) { T x, y; extgcd(a, m, x, y); return (m + x % m) % m; } ll mod_pow(ll a, ll n, ll mod) { ll ret = 1; ll p = a % mod; while (n) { if (n & 1) ret = ret * p % mod; p = p * p % mod; n >>= 1; } return ret; } template class NTT { public: int get_mod() const { return mod; } void _ntt(vector& a, int sign) { const int n = a.size(); assert((n ^ (n & -n)) == 0); //n = 2^k const int g = 3; //g is primitive root of mod int h = (int)mod_pow(g, (mod - 1) / n, mod); // h^n = 1 if (sign == -1) h = (int)mod_inv(h, mod); //h = h^-1 % mod //bit reverse int i = 0; for (int j = 1; j < n - 1; ++j) { for (int k = n >> 1; k > (i ^= k); k >>= 1); if (j < i) swap(a[i], a[j]); } for (int m = 1; m < n; m *= 2) { const int m2 = 2 * m; const ll base = mod_pow(h, n / m2, mod); ll w = 1; FOR(x, m) { for (int s = x; s < n; s += m2) { ll u = a[s]; ll d = a[s + m] * w % mod; a[s] = u + d; if (a[s] >= mod) a[s] -= mod; a[s + m] = u - d; if (a[s + m] < 0) a[s + m] += mod; } w = w * base % mod; } } for (auto& x : a) if (x < 0) x += mod; } void ntt(vector & input) { _ntt(input, 1); } void intt(vector & input) { _ntt(input, -1); const int n_inv = mod_inv((int)input.size(), mod); for (auto& x : input) x = x * n_inv % mod; } // 畳み込み演算を行う vector convolution(const vector & a, const vector & b) { int ntt_size = 1; while (ntt_size < a.size() + b.size()) ntt_size *= 2; vector _a = a, _b = b; _a.resize(ntt_size); _b.resize(ntt_size); ntt(_a); ntt(_b); FOR(i, ntt_size) { (_a[i] *= _b[i]) %= mod; } intt(_a); return _a; } }; ll garner(vector mr, int mod) { mr.emplace_back(mod, 0); vector coffs(mr.size(), 1); vector constants(mr.size(), 0); FOR(i, mr.size() - 1) { // coffs[i] * v + constants[i] == mr[i].second (mod mr[i].first) を解く ll v = (mr[i].second - constants[i]) * mod_inv(coffs[i], mr[i].first) % mr[i].first; if (v < 0) v += mr[i].first; for (int j = i + 1; j < mr.size(); j++) { (constants[j] += coffs[j] * v) %= mr[j].first; (coffs[j] *= mr[i].first) %= mr[j].first; } } return constants[mr.size() - 1]; } typedef NTT<998244353, 3> NTT_1; typedef NTT<897581057, 3> NTT_2; typedef NTT<645922817, 3> NTT_3; typedef NTT<595591169, 3> NTT_4; const int mod = 1000000007; ll solve_stupid(vector& fibs) { ll res = 0; for (size_t i = 0; i < fibs.size(); i++) { for (size_t j = i; j < fibs.size(); j++) { (res += fibs[i] * fibs[j]) %= mod; } } return res; } ll solve_ntt(vector& fibs) { //modする前は(10^9)^2*(10^6)^2=10^30くらいなので、NTTを4回してgarnerで復元する NTT_1 ntt1; NTT_2 ntt2; NTT_3 ntt3; NTT_4 ntt4; auto ntt1_res = ntt1.convolution(fibs, fibs); auto ntt2_res = ntt2.convolution(fibs, fibs); auto ntt3_res = ntt3.convolution(fibs, fibs); auto ntt4_res = ntt4.convolution(fibs, fibs); ll ntt1_sum = 0, ntt2_sum = 0, ntt3_sum = 0, ntt4_sum = 0; for (size_t i = 0; i < ntt1_res.size(); i++) { (ntt1_sum += ntt1_res[i]) %= ntt1.get_mod(); (ntt2_sum += ntt2_res[i]) %= ntt2.get_mod(); (ntt3_sum += ntt3_res[i]) %= ntt3.get_mod(); (ntt4_sum += ntt4_res[i]) %= ntt4.get_mod(); } vector ntt_mods(4); ntt_mods[0].first = ntt1.get_mod(), ntt_mods[0].second = (int)ntt1_sum; ntt_mods[1].first = ntt2.get_mod(), ntt_mods[1].second = (int)ntt2_sum; ntt_mods[2].first = ntt3.get_mod(), ntt_mods[2].second = (int)ntt3_sum; ntt_mods[3].first = ntt4.get_mod(), ntt_mods[3].second = (int)ntt4_sum; ll res = garner(ntt_mods, mod); for (size_t i = 0; i < fibs.size(); i++) (res += fibs[i] * fibs[i]) %= mod; (res *= mod_inv(2, mod)) %= mod; return res; } int main() { int n, p; cin >> n >> p; if (n == 1) { cout << 0 << endl; return 0; } vector fibs(n); fibs[0] = 0; fibs[1] = 1; for (int i = 2; i < n; i++) fibs[i] = (fibs[i - 1] * p + fibs[i - 2]) % mod; cout << solve_ntt(fibs) << endl; }