// author: hotman78 // date: 2023/10/12-13:23:01 // --- begin raw code ----------------- // #include"cpplib/util/template.hpp" // #include"cpplib/math/ACL_modint.hpp" // #include"cpplib/math/ACL_convolution.hpp" // #include"cpplib/math/poly.hpp" // // int main(){ // lint n,p; // cin>>n>>p; // mint::set_mod(p); // vectora(n+1),b(n+1); // rep(i,1,n+1)a[i]=(i==1?mint(1):mint(i).pow(i-2))*fact_inv(i); // rep(i,1,n+1)b[i]=(i==1?mint(1):mint(i).pow(i-1))*fact_inv(i); // auto ans=composition(a,b); // cout< using namespace std; #line 1 "cpplib/util/ioutil.hpp" // template // std::ostream& output(std::ostream& out,const Head& head,const Args&... args){ // out>>head; // return output(head,args...); // } // template // std::ostream& output(std::ostream& out,const Head& head){ // out>>head; // return out; // } template std::ostream &operator<<(std::ostream &out, std::pair v) { out << "(" << v.first << "," << v.second << ")"; return out; } // template // ostream& operator<<(ostream& out,std::tuplev){ // std::apply(output,v); // return out; // } #line 11 "cpplib/util/template.hpp" struct __INIT__ { __INIT__() { cin.tie(0); ios::sync_with_stdio(false); cout << fixed << setprecision(15); } } __INIT__; typedef long long lint; constexpr long long INF = 1LL << 60; constexpr int IINF = 1 << 30; constexpr double EPS = 1e-10; #ifndef REACTIVE #define endl '\n'; #endif typedef vector vec; typedef vector> mat; typedef vector>> mat3; typedef vector svec; typedef vector> smat; template using V = vector; template using VV = V>; template inline void output(T t) { bool f = 0; for (auto i : t) { cout << (f ? " " : "") << i; f = 1; } cout << endl; } template inline void output2(T t) { for (auto i : t) output(i); } template inline void debug(T t) { bool f = 0; for (auto i : t) { cerr << (f ? " " : "") << i; f = 1; } cerr << endl; } template inline void debug2(T t) { for (auto i : t) debug(i); } #define loop(n) for (long long _ = 0; _ < (long long)(n); ++_) #define _overload4(_1, _2, _3, _4, name, ...) name #define __rep(i, a) repi(i, 0, a, 1) #define _rep(i, a, b) repi(i, a, b, 1) #define repi(i, a, b, c) \ for (long long i = (long long)(a); i < (long long)(b); i += c) #define rep(...) _overload4(__VA_ARGS__, repi, _rep, __rep)(__VA_ARGS__) #define _overload3_rev(_1, _2, _3, name, ...) name #define _rep_rev(i, a) repi_rev(i, 0, a) #define repi_rev(i, a, b) \ for (long long i = (long long)(b)-1; i >= (long long)(a); --i) #define rrep(...) _overload3_rev(__VA_ARGS__, repi_rev, _rep_rev)(__VA_ARGS__) #define all(n) begin(n), end(n) template bool chmin(T &s, const E &t) { bool res = s > t; s = min(s, t); return res; } template bool chmax(T &s, const E &t) { bool res = s < t; s = max(s, t); return res; } const vector dx = {1, 0, -1, 0, 1, 1, -1, -1}; const vector dy = {0, 1, 0, -1, 1, -1, 1, -1}; #define SUM(v) accumulate(all(v), 0LL) #if __cplusplus >= 201703L template auto make_vector(T x, int arg, Args... args) { if constexpr (sizeof...(args) == 0) return vector(arg, x); else return vector(arg, make_vector(x, args...)); } #endif #define extrep(v, ...) for (auto v : __MAKE_MAT__({__VA_ARGS__})) #define bit(n, a) ((n >> a) & 1) vector> __MAKE_MAT__(vector v) { if (v.empty()) return vector>(1, vector()); long long n = v.back(); v.pop_back(); vector> ret; vector> tmp = __MAKE_MAT__(v); for (auto e : tmp) for (long long i = 0; i < n; ++i) { ret.push_back(e); ret.back().push_back(i); } return ret; } using graph = vector>; template using graph_w = vector>>; #if __cplusplus >= 201703L constexpr inline long long powll(long long a, long long b) { long long res = 1; while (b--) res *= a; return res; } #endif template pair &operator+=(pair &s, const pair &t) { s.first += t.first; s.second += t.second; return s; } template pair &operator-=(pair &s, const pair &t) { s.first -= t.first; s.second -= t.second; return s; } template pair operator+(const pair &s, const pair &t) { auto res = s; return res += t; } template pair operator-(const pair &s, const pair &t) { auto res = s; return res -= t; } #define BEGIN_STACK_EXTEND(size) \ void *stack_extend_memory_ = malloc(size); \ void *stack_extend_origin_memory_; \ char *stack_extend_dummy_memory_ = (char *)alloca( \ (1 + (int)(((long long)stack_extend_memory_) & 127)) * 16); \ *stack_extend_dummy_memory_ = 0; \ asm volatile("mov %%rsp, %%rbx\nmov %%rax, %%rsp" \ : "=b"(stack_extend_origin_memory_) \ : "a"((char *)stack_extend_memory_ + (size)-1024)); #define END_STACK_EXTEND \ asm volatile("mov %%rax, %%rsp" ::"a"(stack_extend_origin_memory_)); \ free(stack_extend_memory_); int floor_pow(int n) { return n ? 31 - __builtin_clz(n) : 0; } #line 2 "cpplib/math/ACL_modint.hpp" #include #include #include #ifdef _MSC_VER #include #endif #include #ifdef _MSC_VER #include #endif namespace atcoder { namespace internal { constexpr long long safe_mod(long long x, long long m) { x %= m; if (x < 0) x += m; return x; } struct barrett { unsigned int _m; unsigned long long im; explicit barrett(unsigned int m) : _m(m), im((unsigned long long)(-1) / m + 1) {} unsigned int umod() const { return _m; } unsigned int mul(unsigned int a, unsigned int b) const { unsigned long long z = a; z *= b; #ifdef _MSC_VER unsigned long long x; _umul128(z, im, &x); #else unsigned long long x = (unsigned long long)(((unsigned __int128)(z)*im) >> 64); #endif unsigned long long y = x * _m; return (unsigned int)(z - y + (z < y ? _m : 0)); } }; constexpr long long pow_mod_constexpr(long long x, long long n, int m) { if (m == 1) return 0; unsigned int _m = (unsigned int)(m); unsigned long long r = 1; unsigned long long y = safe_mod(x, m); while (n) { if (n & 1) r = (r * y) % _m; y = (y * y) % _m; n >>= 1; } return r; } constexpr bool is_prime_constexpr(int n) { if (n <= 1) return false; if (n == 2 || n == 7 || n == 61) return true; if (n % 2 == 0) return false; long long d = n - 1; while (d % 2 == 0) d /= 2; constexpr long long bases[3] = {2, 7, 61}; for (long long a : bases) { long long t = d; long long y = pow_mod_constexpr(a, t, n); while (t != n - 1 && y != 1 && y != n - 1) { y = y * y % n; t <<= 1; } if (y != n - 1 && t % 2 == 0) { return false; } } return true; } template constexpr bool is_prime = is_prime_constexpr(n); constexpr std::pair inv_gcd(long long a, long long b) { a = safe_mod(a, b); if (a == 0) return {b, 0}; long long s = b, t = a; long long m0 = 0, m1 = 1; while (t) { long long u = s / t; s -= t * u; m0 -= m1 * u; // |m1 * u| <= |m1| * s <= b auto tmp = s; s = t; t = tmp; tmp = m0; m0 = m1; m1 = tmp; } if (m0 < 0) m0 += b / s; return {s, m0}; } constexpr int primitive_root_constexpr(int m) { if (m == 2) return 1; if (m == 167772161) return 3; if (m == 469762049) return 3; if (m == 754974721) return 11; if (m == 998244353) return 3; int divs[20] = {}; divs[0] = 2; int cnt = 1; int x = (m - 1) / 2; while (x % 2 == 0) x /= 2; for (int i = 3; (long long)(i)*i <= x; i += 2) { if (x % i == 0) { divs[cnt++] = i; while (x % i == 0) { x /= i; } } } if (x > 1) { divs[cnt++] = x; } for (int g = 2;; g++) { bool ok = true; for (int i = 0; i < cnt; i++) { if (pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) { ok = false; break; } } if (ok) return g; } } template constexpr int primitive_root = primitive_root_constexpr(m); unsigned long long floor_sum_unsigned(unsigned long long n, unsigned long long m, unsigned long long a, unsigned long long b) { unsigned long long ans = 0; while (true) { if (a >= m) { ans += n * (n - 1) / 2 * (a / m); a %= m; } if (b >= m) { ans += n * (b / m); b %= m; } unsigned long long y_max = a * n + b; if (y_max < m) break; n = (unsigned long long)(y_max / m); b = (unsigned long long)(y_max % m); std::swap(m, a); } return ans; } } // namespace internal } // namespace atcoder #include #include #include namespace atcoder { namespace internal { #ifndef _MSC_VER template using is_signed_int128 = typename std::conditional::value || std::is_same::value, std::true_type, std::false_type>::type; template using is_unsigned_int128 = typename std::conditional::value || std::is_same::value, std::true_type, std::false_type>::type; template using make_unsigned_int128 = typename std::conditional::value, __uint128_t, unsigned __int128>; template using is_integral = typename std::conditional::value || is_signed_int128::value || is_unsigned_int128::value, std::true_type, std::false_type>::type; template using is_signed_int = typename std::conditional<(is_integral::value && std::is_signed::value) || is_signed_int128::value, std::true_type, std::false_type>::type; template using is_unsigned_int = typename std::conditional<(is_integral::value && std::is_unsigned::value) || is_unsigned_int128::value, std::true_type, std::false_type>::type; template using to_unsigned = typename std::conditional< is_signed_int128::value, make_unsigned_int128, typename std::conditional::value, std::make_unsigned, std::common_type>::type>::type; #else template using is_integral = typename std::is_integral; template using is_signed_int = typename std::conditional::value && std::is_signed::value, std::true_type, std::false_type>::type; template using is_unsigned_int = typename std::conditional::value && std::is_unsigned::value, std::true_type, std::false_type>::type; template using to_unsigned = typename std::conditional::value, std::make_unsigned, std::common_type>::type; #endif template using is_signed_int_t = std::enable_if_t::value>; template using is_unsigned_int_t = std::enable_if_t::value>; template using to_unsigned_t = typename to_unsigned::type; } // namespace internal } // namespace atcoder namespace atcoder { namespace internal { struct modint_base {}; struct static_modint_base : modint_base {}; template using is_modint = std::is_base_of; template using is_modint_t = std::enable_if_t::value>; } // namespace internal template * = nullptr> struct static_modint : internal::static_modint_base { using mint = static_modint; public: static constexpr int mod() { return m; } static mint raw(int v) { mint x; x._v = v; return x; } static_modint() : _v(0) {} template * = nullptr> static_modint(T v) { long long x = (long long)(v % (long long)(umod())); if (x < 0) x += umod(); _v = (unsigned int)(x); } template * = nullptr> static_modint(T v) { _v = (unsigned int)(v % umod()); } unsigned int val() const { return _v; } mint &operator++() { _v++; if (_v == umod()) _v = 0; return *this; } mint &operator--() { if (_v == 0) _v = umod(); _v--; return *this; } mint operator++(int) { mint result = *this; ++*this; return result; } mint operator--(int) { mint result = *this; --*this; return result; } mint &operator+=(const mint &rhs) { _v += rhs._v; if (_v >= umod()) _v -= umod(); return *this; } mint &operator-=(const mint &rhs) { _v -= rhs._v; if (_v >= umod()) _v += umod(); return *this; } mint &operator*=(const mint &rhs) { unsigned long long z = _v; z *= rhs._v; _v = (unsigned int)(z % umod()); return *this; } mint &operator/=(const mint &rhs) { return *this = *this * rhs.inv(); } mint operator+() const { return *this; } mint operator-() const { return mint() - *this; } mint pow(long long n) const { assert(0 <= n); mint x = *this, r = 1; while (n) { if (n & 1) r *= x; x *= x; n >>= 1; } return r; } mint inv() const { if (prime) { assert(_v); return pow(umod() - 2); } else { auto eg = internal::inv_gcd(_v, m); assert(eg.first == 1); return eg.second; } } friend mint operator+(const mint &lhs, const mint &rhs) { return mint(lhs) += rhs; } friend mint operator-(const mint &lhs, const mint &rhs) { return mint(lhs) -= rhs; } friend mint operator*(const mint &lhs, const mint &rhs) { return mint(lhs) *= rhs; } friend mint operator/(const mint &lhs, const mint &rhs) { return mint(lhs) /= rhs; } friend bool operator==(const mint &lhs, const mint &rhs) { return lhs._v == rhs._v; } friend bool operator!=(const mint &lhs, const mint &rhs) { return lhs._v != rhs._v; } private: unsigned int _v; static constexpr unsigned int umod() { return m; } static constexpr bool prime = internal::is_prime; }; template struct dynamic_modint : internal::modint_base { using mint = dynamic_modint; public: static int mod() { return (int)(bt.umod()); } static void set_mod(int m) { assert(1 <= m); bt = internal::barrett(m); } static mint raw(int v) { mint x; x._v = v; return x; } dynamic_modint() : _v(0) {} template * = nullptr> dynamic_modint(T v) { long long x = (long long)(v % (long long)(mod())); if (x < 0) x += mod(); _v = (unsigned int)(x); } template * = nullptr> dynamic_modint(T v) { _v = (unsigned int)(v % mod()); } unsigned int val() const { return _v; } mint &operator++() { _v++; if (_v == umod()) _v = 0; return *this; } mint &operator--() { if (_v == 0) _v = umod(); _v--; return *this; } mint operator++(int) { mint result = *this; ++*this; return result; } mint operator--(int) { mint result = *this; --*this; return result; } mint &operator+=(const mint &rhs) { _v += rhs._v; if (_v >= umod()) _v -= umod(); return *this; } mint &operator-=(const mint &rhs) { _v += mod() - rhs._v; if (_v >= umod()) _v -= umod(); return *this; } mint &operator*=(const mint &rhs) { _v = bt.mul(_v, rhs._v); return *this; } mint &operator/=(const mint &rhs) { return *this = *this * rhs.inv(); } mint operator+() const { return *this; } mint operator-() const { return mint() - *this; } mint pow(long long n) const { assert(0 <= n); mint x = *this, r = 1; while (n) { if (n & 1) r *= x; x *= x; n >>= 1; } return r; } mint inv() const { auto eg = internal::inv_gcd(_v, mod()); assert(eg.first == 1); return eg.second; } friend mint operator+(const mint &lhs, const mint &rhs) { return mint(lhs) += rhs; } friend mint operator-(const mint &lhs, const mint &rhs) { return mint(lhs) -= rhs; } friend mint operator*(const mint &lhs, const mint &rhs) { return mint(lhs) *= rhs; } friend mint operator/(const mint &lhs, const mint &rhs) { return mint(lhs) /= rhs; } friend bool operator==(const mint &lhs, const mint &rhs) { return lhs._v == rhs._v; } friend bool operator!=(const mint &lhs, const mint &rhs) { return lhs._v != rhs._v; } private: unsigned int _v; static internal::barrett bt; static unsigned int umod() { return bt.umod(); } }; template internal::barrett dynamic_modint::bt(998244353); using modint998244353 = static_modint<998244353>; using modint1000000007 = static_modint<1000000007>; using modint = dynamic_modint<-1>; namespace internal { template using is_static_modint = std::is_base_of; template using is_static_modint_t = std::enable_if_t::value>; template struct is_dynamic_modint : public std::false_type {}; template struct is_dynamic_modint> : public std::true_type {}; template using is_dynamic_modint_t = std::enable_if_t::value>; } // namespace internal } // namespace atcoder using mint = atcoder::modint; #line 4 "cpplib/math/ACL_modint_base.hpp" std::ostream &operator<<(std::ostream &lhs, const mint &rhs) noexcept { lhs << rhs.val(); return lhs; } std::istream &operator>>(std::istream &lhs, mint &rhs) noexcept { long long x; lhs >> x; rhs = x; return lhs; } int MOD_NOW = -1; int FACT_TABLE_SIZE = 0; std::vector fact_table, fact_inv_table; void update(int x) { if (MOD_NOW != mint::mod() || FACT_TABLE_SIZE == 0) { fact_table.assign(1, 1); fact_inv_table.assign(1, 1); FACT_TABLE_SIZE = 1; MOD_NOW = mint::mod(); } while (FACT_TABLE_SIZE <= x) { fact_table.resize(FACT_TABLE_SIZE * 2); fact_inv_table.resize(FACT_TABLE_SIZE * 2); for (int i = FACT_TABLE_SIZE; i < FACT_TABLE_SIZE * 2; ++i) { fact_table[i] = fact_table[i - 1] * i; } fact_inv_table[FACT_TABLE_SIZE * 2 - 1] = fact_table[FACT_TABLE_SIZE * 2 - 1].inv(); for (int i = FACT_TABLE_SIZE * 2 - 2; i >= FACT_TABLE_SIZE; --i) { fact_inv_table[i] = fact_inv_table[i + 1] * (i + 1); } FACT_TABLE_SIZE *= 2; } } inline mint fact(int x) { assert(x >= 0); update(x); return fact_table[x]; } inline mint fact_inv(int x) { assert(x >= 0); update(x); return fact_inv_table[x]; } inline mint comb(int x, int y) { if (x < 0 || x < y || y < 0) return 0; return fact(x) * fact_inv(y) * fact_inv(x - y); } inline mint perm(int x, int y) { return fact(x) * fact_inv(x - y); } // x個のグループにy個のものを分ける場合の数 inline mint multi_comb(int x, int y) { if (y == 0 && x >= 0) return 1; if (y < 0 || x <= 0) return 0; return comb(x + y - 1, y); } #line 2 "cpplib/math/ACL_convolution.hpp" #include #include #include #include #include #ifdef _MSC_VER #include #endif #if __cplusplus >= 202002L #include #endif namespace atcoder { namespace internal { #if __cplusplus >= 202002L using std::bit_ceil; #else unsigned int bit_ceil(unsigned int n) { unsigned int x = 1; while (x < (unsigned int)(n)) x *= 2; return x; } #endif int countr_zero(unsigned int n) { #ifdef _MSC_VER unsigned long index; _BitScanForward(&index, n); return index; #else return __builtin_ctz(n); #endif } constexpr int countr_zero_constexpr(unsigned int n) { int x = 0; while (!(n & (1 << x))) x++; return x; } } // namespace internal } // namespace atcoder namespace atcoder { namespace internal { template , internal::is_static_modint_t * = nullptr> struct fft_info { static constexpr int rank2 = countr_zero_constexpr(mint::mod() - 1); std::array root; // root[i]^(2^i) == 1 std::array iroot; // root[i] * iroot[i] == 1 std::array rate2; std::array irate2; std::array rate3; std::array irate3; fft_info() { root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2); iroot[rank2] = root[rank2].inv(); for (int i = rank2 - 1; i >= 0; i--) { root[i] = root[i + 1] * root[i + 1]; iroot[i] = iroot[i + 1] * iroot[i + 1]; } { mint prod = 1, iprod = 1; for (int i = 0; i <= rank2 - 2; i++) { rate2[i] = root[i + 2] * prod; irate2[i] = iroot[i + 2] * iprod; prod *= iroot[i + 2]; iprod *= root[i + 2]; } } { mint prod = 1, iprod = 1; for (int i = 0; i <= rank2 - 3; i++) { rate3[i] = root[i + 3] * prod; irate3[i] = iroot[i + 3] * iprod; prod *= iroot[i + 3]; iprod *= root[i + 3]; } } } }; template * = nullptr> void butterfly(std::vector &a) { int n = int(a.size()); int h = internal::countr_zero((unsigned int)n); static const fft_info info; int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed while (len < h) { if (h - len == 1) { int p = 1 << (h - len - 1); mint rot = 1; for (int s = 0; s < (1 << len); s++) { int offset = s << (h - len); for (int i = 0; i < p; i++) { auto l = a[i + offset]; auto r = a[i + offset + p] * rot; a[i + offset] = l + r; a[i + offset + p] = l - r; } if (s + 1 != (1 << len)) rot *= info.rate2[countr_zero(~(unsigned int)(s))]; } len++; } else { int p = 1 << (h - len - 2); mint rot = 1, imag = info.root[2]; for (int s = 0; s < (1 << len); s++) { mint rot2 = rot * rot; mint rot3 = rot2 * rot; int offset = s << (h - len); for (int i = 0; i < p; i++) { auto mod2 = 1ULL * mint::mod() * mint::mod(); auto a0 = 1ULL * a[i + offset].val(); auto a1 = 1ULL * a[i + offset + p].val() * rot.val(); auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val(); auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val(); auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val(); auto na2 = mod2 - a2; a[i + offset] = a0 + a2 + a1 + a3; a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3)); a[i + offset + 2 * p] = a0 + na2 + a1na3imag; a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag); } if (s + 1 != (1 << len)) rot *= info.rate3[countr_zero(~(unsigned int)(s))]; } len += 2; } } } template * = nullptr> void butterfly_inv(std::vector &a) { int n = int(a.size()); int h = internal::countr_zero((unsigned int)n); static const fft_info info; int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed while (len) { if (len == 1) { int p = 1 << (h - len); mint irot = 1; for (int s = 0; s < (1 << (len - 1)); s++) { int offset = s << (h - len + 1); for (int i = 0; i < p; i++) { auto l = a[i + offset]; auto r = a[i + offset + p]; a[i + offset] = l + r; a[i + offset + p] = (unsigned long long)(mint::mod() + l.val() - r.val()) * irot.val(); ; } if (s + 1 != (1 << (len - 1))) irot *= info.irate2[countr_zero(~(unsigned int)(s))]; } len--; } else { int p = 1 << (h - len); mint irot = 1, iimag = info.iroot[2]; for (int s = 0; s < (1 << (len - 2)); s++) { mint irot2 = irot * irot; mint irot3 = irot2 * irot; int offset = s << (h - len + 2); for (int i = 0; i < p; i++) { auto a0 = 1ULL * a[i + offset + 0 * p].val(); auto a1 = 1ULL * a[i + offset + 1 * p].val(); auto a2 = 1ULL * a[i + offset + 2 * p].val(); auto a3 = 1ULL * a[i + offset + 3 * p].val(); auto a2na3iimag = 1ULL * mint((mint::mod() + a2 - a3) * iimag.val()).val(); a[i + offset] = a0 + a1 + a2 + a3; a[i + offset + 1 * p] = (a0 + (mint::mod() - a1) + a2na3iimag) * irot.val(); a[i + offset + 2 * p] = (a0 + a1 + (mint::mod() - a2) + (mint::mod() - a3)) * irot2.val(); a[i + offset + 3 * p] = (a0 + (mint::mod() - a1) + (mint::mod() - a2na3iimag)) * irot3.val(); } if (s + 1 != (1 << (len - 2))) irot *= info.irate3[countr_zero(~(unsigned int)(s))]; } len -= 2; } } } template * = nullptr> std::vector convolution_naive(const std::vector &a, const std::vector &b) { int n = int(a.size()), m = int(b.size()); std::vector ans(n + m - 1); if (n < m) { for (int j = 0; j < m; j++) { for (int i = 0; i < n; i++) { ans[i + j] += a[i] * b[j]; } } } else { for (int i = 0; i < n; i++) { for (int j = 0; j < m; j++) { ans[i + j] += a[i] * b[j]; } } } return ans; } template * = nullptr> std::vector convolution_fft(std::vector a, std::vector b) { int n = int(a.size()), m = int(b.size()); int z = (int)internal::bit_ceil((unsigned int)(n + m - 1)); a.resize(z); internal::butterfly(a); b.resize(z); internal::butterfly(b); for (int i = 0; i < z; i++) { a[i] *= b[i]; } internal::butterfly_inv(a); a.resize(n + m - 1); mint iz = mint(z).inv(); for (int i = 0; i < n + m - 1; i++) a[i] *= iz; return a; } } // namespace internal template * = nullptr> std::vector convolution(std::vector &&a, std::vector &&b) { int n = int(a.size()), m = int(b.size()); if (!n || !m) return {}; int z = (int)internal::bit_ceil((unsigned int)(n + m - 1)); assert((mint::mod() - 1) % z == 0); if (std::min(n, m) <= 60) return convolution_naive(a, b); return internal::convolution_fft(a, b); } template * = nullptr> std::vector convolution(const std::vector &a, const std::vector &b) { int n = int(a.size()), m = int(b.size()); if (!n || !m) return {}; int z = (int)internal::bit_ceil((unsigned int)(n + m - 1)); assert((mint::mod() - 1) % z == 0); if (std::min(n, m) <= 60) return convolution_naive(a, b); return internal::convolution_fft(a, b); } template ::value> * = nullptr> std::vector convolution(const std::vector &a, const std::vector &b) { int n = int(a.size()), m = int(b.size()); if (!n || !m) return {}; using mint = static_modint; int z = (int)internal::bit_ceil((unsigned int)(n + m - 1)); assert((mint::mod() - 1) % z == 0); std::vector a2(n), b2(m); for (int i = 0; i < n; i++) { a2[i] = mint(a[i]); } for (int i = 0; i < m; i++) { b2[i] = mint(b[i]); } auto c2 = convolution(std::move(a2), std::move(b2)); std::vector c(n + m - 1); for (int i = 0; i < n + m - 1; i++) { c[i] = c2[i].val(); } return c; } std::vector convolution_ll(const std::vector &a, const std::vector &b) { int n = int(a.size()), m = int(b.size()); if (!n || !m) return {}; static constexpr unsigned long long MOD1 = 754974721; // 2^24 static constexpr unsigned long long MOD2 = 167772161; // 2^25 static constexpr unsigned long long MOD3 = 469762049; // 2^26 static constexpr unsigned long long M2M3 = MOD2 * MOD3; static constexpr unsigned long long M1M3 = MOD1 * MOD3; static constexpr unsigned long long M1M2 = MOD1 * MOD2; static constexpr unsigned long long M1M2M3 = MOD1 * MOD2 * MOD3; static constexpr unsigned long long i1 = internal::inv_gcd(MOD2 * MOD3, MOD1).second; static constexpr unsigned long long i2 = internal::inv_gcd(MOD1 * MOD3, MOD2).second; static constexpr unsigned long long i3 = internal::inv_gcd(MOD1 * MOD2, MOD3).second; static constexpr int MAX_AB_BIT = 24; static_assert(MOD1 % (1ull << MAX_AB_BIT) == 1, "MOD1 isn't enough to support an array length of 2^24."); static_assert(MOD2 % (1ull << MAX_AB_BIT) == 1, "MOD2 isn't enough to support an array length of 2^24."); static_assert(MOD3 % (1ull << MAX_AB_BIT) == 1, "MOD3 isn't enough to support an array length of 2^24."); assert(n + m - 1 <= (1 << MAX_AB_BIT)); auto c1 = convolution(a, b); auto c2 = convolution(a, b); auto c3 = convolution(a, b); std::vector c(n + m - 1); for (int i = 0; i < n + m - 1; i++) { unsigned long long x = 0; x += (c1[i] * i1) % MOD1 * M2M3; x += (c2[i] * i2) % MOD2 * M1M3; x += (c3[i] * i3) % MOD3 * M1M2; long long diff = c1[i] - internal::safe_mod((long long)(x), (long long)(MOD1)); if (diff < 0) diff += MOD1; static constexpr unsigned long long offset[5] = { 0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3}; x -= offset[diff % 5]; c[i] = x; } return c; } } // namespace atcoder #line 2 "cpplib/math/mod_pow.hpp" /** * @brief (x^y)%mod */ long long mod_pow(long long x, long long y, long long mod) { long long ret = 1; while (y > 0) { if (y & 1) (ret *= x) %= mod; (x *= x) %= mod; y >>= 1; } return ret; } #line 4 "cpplib/math/garner.hpp" /** * * @brief ガーナーのアルゴリズム * */ long long garner(const std::vector &a, const std::vector &mods) { const int sz = a.size(); long long coeffs[sz + 1] = {1, 1, 1, 1}; long long constants[sz + 1] = {}; for (int i = 0; i < sz; i++) { long long v = (mods[i] + a[i] - constants[i]) % mods[i] * mod_pow(coeffs[i], mods[i] - 2, mods[i]) % mods[i]; for (int j = i + 1; j < sz + 1; j++) { constants[j] = (constants[j] + coeffs[j] * v) % mods[j]; coeffs[j] = (coeffs[j] * mods[i]) % mods[j]; } } return constants[sz]; } #line 1 "cpplib/math/ceil_pow2.hpp" int ceil_pow2(int n) { int x = 0; while ((1U << x) < (unsigned int)(n)) x++; return x; } #line 6 "cpplib/math/ACL_convolution.hpp" #line 8 "cpplib/math/ACL_convolution.hpp" template std::vector convolution(const std::vector &_s, const std::vector &_t) { using T = std::vector; if (_s.size() == 0 || _t.size() == 0) return T(); const size_t sz = _s.size() + _t.size() - 1; std::vector> s1(_s.size()), t1(_t.size()); std::vector> s2(_s.size()), t2(_t.size()); std::vector> s3(_s.size()), t3(_t.size()); for (size_t i = 0; i < _s.size(); ++i) { s1[i] = _s[i].val(); s2[i] = _s[i].val(); s3[i] = _s[i].val(); } for (size_t i = 0; i < _t.size(); ++i) { t1[i] = _t[i].val(); t2[i] = _t[i].val(); t3[i] = _t[i].val(); } auto v1 = atcoder::convolution(s1, t1); auto v2 = atcoder::convolution(s2, t2); auto v3 = atcoder::convolution(s3, t3); T v(sz); for (size_t i = 0; i < sz; ++i) { v[i] = garner( std::vector{v1[i].val(), v2[i].val(), v3[i].val()}, std::vector{1224736769, 1045430273, 1007681537, (long long)Mint::mod()}); } return v; } #line 2 "cpplib/math/poly.hpp" using poly = vector; int size(const poly &x) { return x.size(); } poly shrink(poly x) { while (size(x) >= 1 && x.back().val() == 0) x.pop_back(); return x; } poly pre(const poly &x, int n) { auto res = x; res.resize(n); return res; } poly operator+(const poly &x, const poly &y) { poly res(max(x.size(), y.size())); rep(i, 0, x.size()) res[i] += x[i]; rep(i, 0, y.size()) res[i] += y[i]; return res; } poly &operator*=(poly &x, const mint &y) { rep(i, 0, x.size()) x[i] *= y; return x; } poly operator*(poly x, const mint &y) { return x *= y; } poly operator-(const poly &x) { poly res(x.size()); rep(i, 0, x.size()) res[i] = -x[i]; return res; } poly operator-(const poly &x, const poly &y) { return x + (-y); } // poly operator*(const poly&x,const poly&y){ // return atcoder::convolution(x,y); // } poly operator*(const poly &x, const poly &y) { return convolution(x, y); } poly &operator+=(poly &x, const poly &y) { return x = (x + y); } poly &operator-=(poly &x, const poly &y) { return x = (x - y); } poly &operator*=(poly &x, const poly &y) { return x = (x * y); } istream &operator>>(istream &in, poly &y) { int n = size(y); rep(i, 0, n) in >> y[i]; return in; } ostream &operator<<(ostream &out, const poly &y) { int n = size(y); rep(i, 0, n) { if (i) out << ' '; out << y[i].val(); } return out; } poly diff(const poly &x) { int n = size(x); poly res(n - 1); rep(i, 0, n - 1) res[i] = x[i + 1] * (i + 1); return res; } poly integrate(const poly &x) { int n = size(x); poly res(n + 1); rep(i, 1, n + 1) res[i] = x[i - 1] / i; return res; } poly inv(const poly &x) { int n = size(x); if (n == 1) return poly{x[0].inv()}; auto c = inv(pre(x, (n + 1) / 2)); return pre(c * (poly{2} - c * x), n); } poly log(const poly &x) { int n = size(x); assert(x[0].val() == 1); return pre(integrate(diff(x) * inv(x)), n); } pair divmod(const poly &a, const poly &b) { assert(!b.empty()); if (b.back().val() == 0) return divmod(a, shrink(b)); if (a.empty()) return make_pair(poly{}, poly{}); if (a.back().val() == 0) return divmod(shrink(a), b); int n = max(0, size(a) - size(b) + 1); if (n == 0) return make_pair(poly{}, a); auto c = a; auto d = b; reverse(c.begin(), c.end()); reverse(d.begin(), d.end()); d.resize(n); c *= inv(d); c.resize(n); reverse(c.begin(), c.end()); return make_pair(c, pre(a - c * b, (int)b.size() - 1)); } poly multipoint_evalution(const poly &a, const poly &b) { int n = b.size(); vector v(n * 2); rep(i, 0, n) { v[i + n] = poly{-mint(b[i]), mint(1)}; } for (int i = n - 1; i >= 1; --i) { v[i] = v[i * 2] * v[i * 2 + 1]; } poly ans(n); v[0] = a; rep(i, 1, n * 2) { v[i] = divmod(v[i / 2], v[i]).second; if (i >= n) ans[i - n] = v[i][0]; } return ans; } vector composition(vector f, vector g) { int n = f.size(), m = g.size(); assert(n == m); vector res(n); int b = ceil(sqrt(n)); vector> g_pow(b + 1); g_pow[0] = vector{1}; for (int i = 0; i < b; ++i) { g_pow[i + 1] = g_pow[i] * g; g_pow[i + 1].resize(n); } vector g_pow2 = vector{1}; for (int i = 0; i < n; i += b) { vector tmp; for (int j = i; j < std::min(i + b, n); ++j) { tmp += g_pow[j - i] * f[j]; } res += tmp * g_pow2; res.resize(n); g_pow2 *= g_pow[b]; g_pow2.resize(n); } return res; } #line 5 "main.cpp" int main() { lint n, p; cin >> n >> p; mint::set_mod(p); vector a(n + 1), b(n + 1); rep(i, 1, n + 1) a[i] = (i == 1 ? mint(1) : mint(i).pow(i - 2)) * fact_inv(i); rep(i, 1, n + 1) b[i] = (i == 1 ? mint(1) : mint(i).pow(i - 1)) * fact_inv(i); auto ans = composition(a, b); cout << ans[n] * fact(n) << endl; }