// Nyaanさんのライブラリを使いました // https://nyaannyaan.github.io/library/modulo/arbitrary-mod-binomial.hpp #include #include #include #include #include #include using i32 = int; using u32 = unsigned int; using i64 = long long; using u64 = unsigned long long; using i128 = __int128_t; using u128 = __uint128_t; using f64 = double; using f80 = long double; using f128 = __float128; constexpr i32 operator"" _i32(u64 v) { return v; } constexpr u32 operator"" _u32(u64 v) { return v; } constexpr i64 operator"" _i64(u64 v) { return v; } constexpr u64 operator"" _u64(u64 v) { return v; } constexpr f64 operator"" _f64(f80 v) { return v; } constexpr f80 operator"" _f80(f80 v) { return v; } using Istream = std::istream; using Ostream = std::ostream; using Str = std::string; template using Lt = std::less; template using Gt = std::greater; template using IList = std::initializer_list; template using BSet = std::bitset; template using Pair = std::pair; template using Tup = std::tuple; template using Arr = std::array; template using Deq = std::deque; template using Set = std::set; template using MSet = std::multiset; template using USet = std::unordered_set; template using UMSet = std::unordered_multiset; template using Map = std::map; template using MMap = std::multimap; template using UMap = std::unordered_map; template using UMMap = std::unordered_multimap; template using Vec = std::vector; template using Stack = std::stack; template using Queue = std::queue; template using MaxHeap = std::priority_queue; template using MinHeap = std::priority_queue, Gt>; using NSec = std::chrono::nanoseconds; using USec = std::chrono::microseconds; using MSec = std::chrono::milliseconds; using Sec = std::chrono::seconds; constexpr bool LOCAL = false; constexpr bool OJ = not LOCAL; template static constexpr T OjLocal(T oj, T local) { return LOCAL ? local : oj; } template constexpr T LIMMIN = std::numeric_limits::min(); template constexpr T LIMMAX = std::numeric_limits::max(); template constexpr T INF = (LIMMAX - 1) / 2; template constexpr T PI = T{3.141592653589793238462643383279502884}; template constexpr T TEN(int n) { return n == 0 ? T{1} : TEN(n - 1) * T{10}; } template constexpr Vec& operator+=(Vec& vs1, const Vec& vs2) { return vs1.insert(vs1.end(), vs2.begin(), vs2.end()), vs1; } template constexpr Vec operator+(const Vec& vs1, const Vec& vs2) { auto vs = vs1; vs += vs2; return vs; } template constexpr bool chmin(T& a, const T& b) { return (a > b ? (a = b, true) : false); } template constexpr bool chmax(T& a, const T& b) { return (a < b ? (a = b, true) : false); } template constexpr T floorDiv(T x, T y) { assert(y != 0); if (y < T{}) { x = -x, y = -y; } return x >= T{} ? x / y : (x - y + 1) / y; } template constexpr T ceilDiv(T x, T y) { assert(y != 0); if (y < T{}) { x = -x, y = -y; } return x >= T{} ? (x + y - 1) / y : x / y; } template constexpr T powerMonoid(T v, I n, const T& e) { assert(n >= 0); T ans = e; for (; n > 0; n >>= 1, v *= v) { if (n % 2 == 1) { ans *= v; } } return ans; } template constexpr T powerInt(T v, I n) { return powerMonoid(v, n, T{1}); } template constexpr void fillAll(Vs& arr, const V& v) { if constexpr (std::is_convertible::value) { arr = v; } else { for (auto& subarr : arr) { fillAll(subarr, v); } } } template constexpr void sortAll(Vs& vs) { std::sort(std::begin(vs), std::end(vs)); } template constexpr void sortAll(Vs& vs, C comp) { std::sort(std::begin(vs), std::end(vs), comp); } template constexpr void reverseAll(Vs& vs) { std::reverse(std::begin(vs), std::end(vs)); } template constexpr V sumAll(const Vs& vs) { if constexpr (std::is_convertible::value) { return static_cast(vs); } else { V ans = 0; for (const auto& v : vs) { ans += sumAll(v); } return ans; } } template constexpr int minInd(const Vs& vs) { return std::min_element(std::begin(vs), std::end(vs)) - std::begin(vs); } template constexpr int maxInd(const Vs& vs) { return std::max_element(std::begin(vs), std::end(vs)) - std::begin(vs); } template constexpr int lbInd(const Vs& vs, const V& v) { return std::lower_bound(std::begin(vs), std::end(vs), v) - std::begin(vs); } template constexpr int ubInd(const Vs& vs, const V& v) { return std::upper_bound(std::begin(vs), std::end(vs), v) - std::begin(vs); } template constexpr void plusAll(Vs& vs, const V& v) { for (auto& v_ : vs) { v_ += v; } } template constexpr Vec genVec(int n, F gen) { Vec ans; std::generate_n(std::back_insert_iterator(ans), n, gen); return ans; } template constexpr Vec iotaVec(int n, T offset = 0) { Vec ans(n); std::iota(ans.begin(), ans.end(), offset); return ans; } Ostream& operator<<(Ostream& os, i128 v) { bool minus = false; if (v < 0) { minus = true, v = -v; } Str ans; if (v == 0) { ans = "0"; } while (v) { ans.push_back('0' + v % 10), v /= 10; } std::reverse(ans.begin(), ans.end()); return os << (minus ? "-" : "") << ans; } Ostream& operator<<(Ostream& os, u128 v) { Str ans; if (v == 0) { ans = "0"; } while (v) { ans.push_back('0' + v % 10), v /= 10; } std::reverse(ans.begin(), ans.end()); return os << ans; } constexpr int popcount(u64 v) { return v ? __builtin_popcountll(v) : 0; } constexpr int log2p1(u64 v) { return v ? 64 - __builtin_clzll(v) : 0; } constexpr int lsbp1(u64 v) { return __builtin_ffsll(v); } constexpr int ceillog(u64 v) { return v ? log2p1(v - 1) : 0; } constexpr u64 ceil2(u64 v) { assert(v <= (1_u64 << 63)); return 1_u64 << ceillog(v); } constexpr u64 floor2(u64 v) { return v ? (1_u64 << (log2p1(v) - 1)) : 0_u64; } constexpr bool ispow2(u64 v) { return (v > 0) and ((v & (v - 1)) == 0); } constexpr bool btest(u64 mask, int ind) { return (mask >> ind) & 1_u64; } template struct Fix : F { constexpr Fix(F&& f) : F{std::forward(f)} {} template constexpr auto operator()(Args&&... args) const { return F::operator()(*this, std::forward(args)...); } }; class irange { private: struct itr { constexpr itr(i64 start = 0, i64 step = 1) : m_cnt{start}, m_step{step} {} constexpr bool operator!=(const itr& it) const { return m_cnt != it.m_cnt; } constexpr i64 operator*() { return m_cnt; } constexpr itr& operator++() { return m_cnt += m_step, *this; } i64 m_cnt, m_step; }; i64 m_start, m_end, m_step; public: static constexpr i64 cnt(i64 start, i64 end, i64 step) { if (step == 0) { return -1; } const i64 d = (step > 0 ? step : -step); const i64 l = (step > 0 ? start : end); const i64 r = (step > 0 ? end : start); i64 n = (r - l) / d + ((r - l) % d ? 1 : 0); if (l >= r) { n = 0; } return n; } constexpr irange(i64 start, i64 end, i64 step = 1) : m_start{start}, m_end{m_start + step * cnt(start, end, step)}, m_step{step} { assert(step != 0); } constexpr itr begin() const { return itr{m_start, m_step}; } constexpr itr end() const { return itr{m_end, m_step}; } }; constexpr irange rep(i64 end) { return irange(0, end, 1); } constexpr irange per(i64 rend) { return irange(rend - 1, -1, -1); } class Scanner { public: Scanner(Istream& is = std::cin) : m_is{is} { m_is.tie(nullptr)->sync_with_stdio(false); } template T val() { T v; return m_is >> v, v; } template T val(T offset) { return val() - offset; } template Vec vec(int n) { return genVec(n, [&]() { return val(); }); } template Vec vec(int n, T offset) { return genVec(n, [&]() { return val(offset); }); } template Vec> vvec(int n, int m) { return genVec>(n, [&]() { return vec(m); }); } template Vec> vvec(int n, int m, const T offset) { return genVec>(n, [&]() { return vec(m, offset); }); } template auto tup() { return Tup{val()...}; } template auto tup(const Args&... offsets) { return Tup{val(offsets)...}; } private: Istream& m_is; }; Scanner in; class Printer { public: Printer(Ostream& os = std::cout) : m_os{os} { m_os << std::fixed << std::setprecision(15); } template int operator()(const Args&... args) { return dump(args...), 0; } template int ln(const Args&... args) { return dump(args...), m_os << '\n', 0; } template int el(const Args&... args) { return dump(args...), m_os << std::endl, 0; } int YES(bool b = true) { return ln(b ? "YES" : "NO"); } int NO(bool b = true) { return YES(not b); } int Yes(bool b = true) { return ln(b ? "Yes" : "No"); } int No(bool b = true) { return Yes(not b); } private: template void dump(const T& v) { m_os << v; } template void dump(const Vec& vs) { for (int i : rep(vs.size())) { m_os << (i ? " " : ""), dump(vs[i]); } } template void dump(const Vec>& vss) { for (int i : rep(vss.size())) { m_os << (i ? "\n" : ""), dump(vss[i]); } } template int dump(const T& v, const Ts&... args) { return dump(v), m_os << ' ', dump(args...), 0; } Ostream& m_os; }; Printer out; template auto ndVec(int const (&szs)[n], const T x = T{}) { if constexpr (i == n) { return x; } else { return std::vector(szs[i], ndVec(szs, x)); } } template T binSearch(T ng, T ok, F check) { while (std::abs(ok - ng) > 1) { const T mid = (ok + ng) / 2; (check(mid) ? ok : ng) = mid; } return ok; } template class modint { template static U modRef() { static u32 s_mod = 0; return s_mod; } template static U rootRef() { static u32 s_root = 0; return s_root; } template static U max2pRef() { static u32 s_max2p = 0; return s_max2p; } public: static constexpr bool isDynamic() { return (mod_ == 0); } template static constexpr std::enable_if_t mod() { return mod_; } template static std::enable_if_t mod() { return modRef(); } template static constexpr std::enable_if_t root() { return root_; } template static std::enable_if_t root() { return rootRef(); } template static constexpr std::enable_if_t max2p() { return max2p_; } template static std::enable_if_t max2p() { return max2pRef(); } template static void setMod(std::enable_if_t m) { modRef() = m; } template static void setRoot(std::enable_if_t r) { rootRef() = r; } template static void setMax2p(std::enable_if_t m) { max2pRef() = m; } constexpr modint() : m_val{0} {} constexpr modint(i64 v) : m_val{normll(v)} {} constexpr void setRaw(u32 v) { m_val = v; } constexpr modint operator-() const { return modint{0} - (*this); } constexpr modint& operator+=(const modint& m) { m_val = norm(m_val + m.val()); return *this; } constexpr modint& operator-=(const modint& m) { m_val = norm(m_val + mod() - m.val()); return *this; } constexpr modint& operator*=(const modint& m) { m_val = normll((i64)m_val * (i64)m.val() % (i64)mod()); return *this; } constexpr modint& operator/=(const modint& m) { return *this *= m.inv(); } constexpr modint operator+(const modint& m) const { auto v = *this; return v += m; } constexpr modint operator-(const modint& m) const { auto v = *this; return v -= m; } constexpr modint operator*(const modint& m) const { auto v = *this; return v *= m; } constexpr modint operator/(const modint& m) const { auto v = *this; return v /= m; } constexpr bool operator==(const modint& m) const { return m_val == m.val(); } constexpr bool operator!=(const modint& m) const { return not(*this == m); } friend Istream& operator>>(Istream& is, modint& m) { i64 v; return is >> v, m = v, is; } friend Ostream& operator<<(Ostream& os, const modint& m) { return os << m.val(); } constexpr u32 val() const { return m_val; } template constexpr modint pow(I n) const { return powerInt(*this, n); } constexpr modint inv() const { return pow(mod() - 2); } static modint sinv(u32 n) { static Vec is{1, 1}; for (u32 i = (u32)is.size(); i <= n; i++) { is.push_back(-is[mod() % i] * (mod() / i)); } return is[n]; } static modint fact(u32 n) { static Vec fs{1, 1}; for (u32 i = (u32)fs.size(); i <= n; i++) { fs.push_back(fs.back() * i); } return fs[n]; } static modint ifact(u32 n) { static Vec ifs{1, 1}; for (u32 i = (u32)ifs.size(); i <= n; i++) { ifs.push_back(ifs.back() * sinv(i)); } return ifs[n]; } static modint comb(int n, int k) { return k > n or k < 0 ? modint{0} : fact(n) * ifact(n - k) * ifact(k); } private: static constexpr u32 norm(u32 x) { return x < mod() ? x : x - mod(); } static constexpr u32 normll(i64 x) { return norm(u32(x % (i64)mod() + (i64)mod())); } u32 m_val; }; using modint_1000000007 = modint<1000000007, 5, 1>; using modint_998244353 = modint<998244353, 3, 23>; template using modint_dynamic = modint<0, 0, id>; template class Graph { struct Edge { Edge() = default; Edge(int i, int t, T c) : id{i}, to{t}, cost{c} {} int id; int to; T cost; operator int() const { return to; } }; public: Graph(int n) : m_v{n}, m_edges(n) {} void addEdge(int u, int v, bool bi = false) { assert(0 <= u and u < m_v); assert(0 <= v and v < m_v); m_edges[u].emplace_back(m_e, v, 1); if (bi) { m_edges[v].emplace_back(m_e, u, 1); } m_e++; } void addEdge(int u, int v, const T& c, bool bi = false) { assert(0 <= u and u < m_v); assert(0 <= v and v < m_v); m_edges[u].emplace_back(m_e, v, c); if (bi) { m_edges[v].emplace_back(m_e, u, c); } m_e++; } const Vec& operator[](const int u) const { assert(0 <= u and u < m_v); return m_edges[u]; } Vec& operator[](const int u) { assert(0 <= u and u < m_v); return m_edges[u]; } int v() const { return m_v; } int e() const { return m_e; } friend Ostream& operator<<(Ostream& os, const Graph& g) { for (int u : rep(g.v())) { for (const auto& [id, v, c] : g[u]) { os << "[" << id << "]: "; os << u << "->" << v << "(" << c << ")\n"; } } return os; } Vec sizes(int root = 0) const { const int N = v(); assert(0 <= root and root < N); Vec ss(N, 1); Fix([&](auto dfs, int u, int p) -> void { for ([[maybe_unused]] const auto& [_temp_name_0, v, c] : m_edges[u]) { if (v == p) { continue; } dfs(v, u); ss[u] += ss[v]; } })(root, -1); return ss; } Vec depths(int root = 0) const { const int N = v(); assert(0 <= root and root < N); Vec ds(N, 0); Fix([&](auto dfs, int u, int p) -> void { for ([[maybe_unused]] const auto& [_temp_name_1, v, c] : m_edges[u]) { if (v == p) { continue; } ds[v] = ds[u] + c; dfs(v, u); } })(root, -1); return ds; } Vec parents(int root = 0) const { const int N = v(); assert(0 <= root and root < N); Vec ps(N, -1); Fix([&](auto dfs, int u, int p) -> void { for ([[maybe_unused]] const auto& [_temp_name_2, v, c] : m_edges[u]) { if (v == p) { continue; } ps[v] = u; dfs(v, u); } })(root, -1); return ps; } private: int m_v; int m_e = 0; Vec> m_edges; }; namespace atcoder { namespace internal { // @param m `1 <= m` // @return x mod m constexpr long long safe_mod(long long x, long long m) { x %= m; if (x < 0) x += m; return x; } // Fast modular multiplication by barrett reduction // Reference: https://en.wikipedia.org/wiki/Barrett_reduction // NOTE: reconsider after Ice Lake struct barrett { unsigned int _m; unsigned long long im; // @param m `1 <= m < 2^31` barrett(unsigned int m) : _m(m), im((unsigned long long)(-1) / m + 1) {} // @return m unsigned int umod() const { return _m; } // @param a `0 <= a < m` // @param b `0 <= b < m` // @return `a * b % m` unsigned int mul(unsigned int a, unsigned int b) const { // [1] m = 1 // a = b = im = 0, so okay // [2] m >= 2 // im = ceil(2^64 / m) // -> im * m = 2^64 + r (0 <= r < m) // let z = a*b = c*m + d (0 <= c, d < m) // a*b * im = (c*m + d) * im = c*(im*m) + d*im = c*2^64 + c*r + d*im // c*r + d*im < m * m + m * im < m * m + 2^64 + m <= 2^64 + m * (m + 1) < 2^64 * 2 // ((ab * im) >> 64) == c or c + 1 unsigned long long z = a; z *= b; unsigned long long x = (unsigned long long)(((unsigned __int128)(z)*im) >> 64); unsigned int v = (unsigned int)(z - x * _m); if (_m <= v) v += _m; return v; } }; // @param n `0 <= n` // @param m `1 <= m` // @return `(x ** n) % m` constexpr long long pow_mod_constexpr(long long x, long long n, int m) { if (m == 1) return 0; unsigned int _m = (unsigned int)(m); unsigned long long r = 1; unsigned long long y = safe_mod(x, m); while (n) { if (n & 1) r = (r * y) % _m; y = (y * y) % _m; n >>= 1; } return r; } // Reference: // M. Forisek and J. Jancina, // Fast Primality Testing for Integers That Fit into a Machine Word // @param n `0 <= n` constexpr bool is_prime_constexpr(int n) { if (n <= 1) return false; if (n == 2 || n == 7 || n == 61) return true; if (n % 2 == 0) return false; long long d = n - 1; while (d % 2 == 0) d /= 2; constexpr long long bases[3] = {2, 7, 61}; for (long long a : bases) { long long t = d; long long y = pow_mod_constexpr(a, t, n); while (t != n - 1 && y != 1 && y != n - 1) { y = y * y % n; t <<= 1; } if (y != n - 1 && t % 2 == 0) { return false; } } return true; } template constexpr bool is_prime = is_prime_constexpr(n); // @param b `1 <= b` // @return pair(g, x) s.t. g = gcd(a, b), xa = g (mod b), 0 <= x < b/g constexpr std::pair inv_gcd(long long a, long long b) { a = safe_mod(a, b); if (a == 0) return {b, 0}; // Contracts: // [1] s - m0 * a = 0 (mod b) // [2] t - m1 * a = 0 (mod b) // [3] s * |m1| + t * |m0| <= b long long s = b, t = a; long long m0 = 0, m1 = 1; while (t) { long long u = s / t; s -= t * u; m0 -= m1 * u; // |m1 * u| <= |m1| * s <= b // [3]: // (s - t * u) * |m1| + t * |m0 - m1 * u| // <= s * |m1| - t * u * |m1| + t * (|m0| + |m1| * u) // = s * |m1| + t * |m0| <= b auto tmp = s; s = t; t = tmp; tmp = m0; m0 = m1; m1 = tmp; } // by [3]: |m0| <= b/g // by g != b: |m0| < b/g if (m0 < 0) m0 += b / s; return {s, m0}; } // Compile time primitive root // @param m must be prime // @return primitive root (and minimum in now) constexpr int primitive_root_constexpr(int m) { if (m == 2) return 1; if (m == 167772161) return 3; if (m == 469762049) return 3; if (m == 754974721) return 11; if (m == 998244353) return 3; int divs[20] = {}; divs[0] = 2; int cnt = 1; int x = (m - 1) / 2; while (x % 2 == 0) x /= 2; for (int i = 3; (long long)(i)*i <= x; i += 2) { if (x % i == 0) { divs[cnt++] = i; while (x % i == 0) { x /= i; } } } if (x > 1) { divs[cnt++] = x; } for (int g = 2;; g++) { bool ok = true; for (int i = 0; i < cnt; i++) { if (pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) { ok = false; break; } } if (ok) return g; } } template constexpr int primitive_root = primitive_root_constexpr(m); } // namespace internal } // namespace atcoder namespace atcoder { long long pow_mod(long long x, long long n, int m) { assert(0 <= n && 1 <= m); if (m == 1) return 0; internal::barrett bt((unsigned int)(m)); unsigned int r = 1, y = (unsigned int)(internal::safe_mod(x, m)); while (n) { if (n & 1) r = bt.mul(r, y); y = bt.mul(y, y); n >>= 1; } return r; } long long inv_mod(long long x, long long m) { assert(1 <= m); auto z = internal::inv_gcd(x, m); assert(z.first == 1); return z.second; } // (rem, mod) std::pair crt(const std::vector& r, const std::vector& m) { assert(r.size() == m.size()); int n = int(r.size()); // Contracts: 0 <= r0 < m0 long long r0 = 0, m0 = 1; for (int i = 0; i < n; i++) { assert(1 <= m[i]); long long r1 = internal::safe_mod(r[i], m[i]), m1 = m[i]; if (m0 < m1) { std::swap(r0, r1); std::swap(m0, m1); } if (m0 % m1 == 0) { if (r0 % m1 != r1) return {0, 0}; continue; } // assume: m0 > m1, lcm(m0, m1) >= 2 * max(m0, m1) // (r0, m0), (r1, m1) -> (r2, m2 = lcm(m0, m1)); // r2 % m0 = r0 // r2 % m1 = r1 // -> (r0 + x*m0) % m1 = r1 // -> x*u0*g % (u1*g) = (r1 - r0) (u0*g = m0, u1*g = m1) // -> x = (r1 - r0) / g * inv(u0) (mod u1) // im = inv(u0) (mod u1) (0 <= im < u1) long long g, im; std::tie(g, im) = internal::inv_gcd(m0, m1); long long u1 = (m1 / g); // |r1 - r0| < (m0 + m1) <= lcm(m0, m1) if ((r1 - r0) % g) return {0, 0}; // u1 * u1 <= m1 * m1 / g / g <= m0 * m1 / g = lcm(m0, m1) long long x = (r1 - r0) / g % u1 * im % u1; // |r0| + |m0 * x| // < m0 + m0 * (u1 - 1) // = m0 + m0 * m1 / g - m0 // = lcm(m0, m1) r0 += x * m0; m0 *= u1; // -> lcm(m0, m1) if (r0 < 0) r0 += m0; } return {r0, m0}; } long long floor_sum(long long n, long long m, long long a, long long b) { long long ans = 0; if (a >= m) { ans += (n - 1) * n * (a / m) / 2; a %= m; } if (b >= m) { ans += n * (b / m); b %= m; } long long y_max = (a * n + b) / m, x_max = (y_max * m - b); if (y_max == 0) return ans; ans += (n - (x_max + a - 1) / a) * y_max; ans += floor_sum(y_max, a, m, (a - x_max % a) % a); return ans; } } // namespace atcoder using namespace std; struct Barrett { using u32 = unsigned int; using i64 = long long; using u64 = unsigned long long; u32 m; u64 im; Barrett() : m(), im() {} Barrett(int n) : m(n), im(u64(-1) / m + 1) {} constexpr inline i64 quo(u64 n) { u64 x = u64((__uint128_t(n) * im) >> 64); u32 r = n - x * m; return m <= r ? x - 1 : x; } constexpr inline i64 rem(u64 n) { u64 x = u64((__uint128_t(n) * im) >> 64); u32 r = n - x * m; return m <= r ? r + m : r; } constexpr inline pair quorem(u64 n) { u64 x = u64((__uint128_t(n) * im) >> 64); u32 r = n - x * m; if (m <= r) return {x - 1, r + m}; return {x, r}; } constexpr inline i64 pow(u64 n, i64 p) { u32 a = rem(n), r = m == 1 ? 0 : 1; while (p) { if (p & 1) r = rem(u64(r) * a); a = rem(u64(a) * a); p >>= 1; } return r; } }; using namespace std; struct prime_power_binomial { int p, q, M; vector fac, ifac, inv; int delta; Barrett bm, bp; prime_power_binomial(int _p, int _q) : p(_p), q(_q) { assert(1 < p && p <= ((1LL << 30) - 1)); assert(_q > 0); long long m = 1; while (_q--) { m *= p; assert(m <= ((1LL << 30) - 1)); } M = m; bm = Barrett(M), bp = Barrett(p); enumerate(); delta = (p == 2 && q >= 3) ? 1 : M - 1; } void enumerate() { int MX = min(M, 20000000 + 10); fac.resize(MX); ifac.resize(MX); inv.resize(MX); fac[0] = ifac[0] = inv[0] = 1; fac[1] = ifac[1] = inv[1] = 1; for (int i = 2; i < MX; i++) { if (i % p == 0) { fac[i] = fac[i - 1]; fac[i + 1] = bm.rem(1LL * fac[i - 1] * (i + 1)); i++; } else { fac[i] = bm.rem(1LL * fac[i - 1] * i); } } ifac[MX - 1] = bm.pow(fac[MX - 1], M / p * (p - 1) - 1); for (int i = MX - 2; i > 1; --i) { if (i % p == 0) { ifac[i] = bm.rem(1LL * ifac[i + 1] * (i + 1)); ifac[i - 1] = ifac[i]; i--; } else { ifac[i] = bm.rem(1LL * ifac[i + 1] * (i + 1)); } } } long long Lucas(long long n, long long m) { int res = 1; while (n) { int n0, m0; tie(n, n0) = bp.quorem(n); tie(m, m0) = bp.quorem(m); if (n0 < m0) return 0; res = bm.rem(1LL * res * fac[n0]); int buf = bm.rem(1LL * ifac[n0 - m0] * ifac[m0]); res = bm.rem(1LL * res * buf); } return res; } long long C(long long n, long long m) { if (n < m || n < 0 || m < 0) return 0; if (q == 1) return Lucas(n, m); long long r = n - m; int e0 = 0, eq = 0, i = 0; int res = 1; while (n) { res = bm.rem(1LL * res * fac[bm.rem(n)]); res = bm.rem(1LL * res * ifac[bm.rem(m)]); res = bm.rem(1LL * res * ifac[bm.rem(r)]); n = bp.quo(n); m = bp.quo(m); r = bp.quo(r); int eps = n - m - r; e0 += eps; if (e0 >= q) return 0; if (++i >= q) eq += eps; } if (eq & 1) res = bm.rem(1LL * res * delta); res = bm.rem(1LL * res * bm.pow(p, e0)); return res; } }; // constraints: // (M <= 1e7 and max(N) <= 1e18) or (M < 2^30 and max(N) <= 2e7) struct arbitrary_mod_binomial { int mod; vector M; vector cs; arbitrary_mod_binomial(long long md) : mod(md) { assert(1 <= md); assert(md <= ((1LL << 30) - 1)); for (int i = 2; i * i <= md; i++) { if (md % i == 0) { int j = 0, k = 1; while (md % i == 0) md /= i, j++, k *= i; M.push_back(k); cs.emplace_back(i, j); assert(M.back() == cs.back().M); } } if (md != 1) { M.push_back(md); cs.emplace_back(md, 1); } assert(M.size() == cs.size()); } long long C(long long n, long long m) { if (mod == 1) return 0; vector rem, d; for (int i = 0; i < (int)cs.size(); i++) { rem.push_back(cs[i].C(n, m)); d.push_back(M[i]); } return atcoder::crt(rem, d).first; } }; /** * @brief 任意mod二項係数 * @docs docs/modulo/arbitrary-mod-binomial.md */ int main() { const auto [L, R, M] = in.tup(); auto mod = arbitrary_mod_binomial(M); i64 ans = 0; for (i64 x : irange(L, R + 1)) { (ans += (mod.C(2 * x, x) + M - 2) % M) %= M; } out.ln(ans); return 0; }