//#pragma GCC optimize("Ofast") //#pragma GCC optimize("unroll-loops") #include using namespace std; using ll = long long; using ull = unsigned long long; using pii = pair; template using V = vector; template using VV = V>; template V make_vec(size_t a) { return V(a); } template auto make_vec(size_t a, Ts... ts) { return V(ts...))>(a, make_vec(ts...)); } #define pb push_back #define eb emplace_back #define mp make_pair #define fi first #define se second #define rep(i, n) rep2(i, 0, n) #define rep2(i, m, n) for (int i = m; i < (n); i++) #define per(i, b) per2(i, 0, b) #define per2(i, a, b) for (int i = int(b) - 1; i >= int(a); i--) #define ALL(c) (c).begin(), (c).end() #define SZ(x) ((int)(x).size()) constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n - 1); } template void chmin(T& t, const U& u) { if (t > u) t = u; } template void chmax(T& t, const U& u) { if (t < u) t = u; } template void mkuni(vector& v) { sort(ALL(v)); v.erase(unique(ALL(v)), end(v)); } template vector sort_by(const vector& v) { vector res(v.size()); iota(res.begin(), res.end(), 0); sort(res.begin(), res.end(), [&](int i, int j) { return v[i] < v[j]; }); return res; } template ostream& operator<<(ostream& os, const pair& p) { os << "(" << p.first << "," << p.second << ")"; return os; } template ostream& operator<<(ostream& os, const vector& v) { os << "{"; rep(i, v.size()) { if (i) os << ","; os << v[i]; } os << "}"; return os; } #ifdef LOCAL void debug_out() { cerr << endl; } template void debug_out(Head H, Tail... T) { cerr << " " << H; debug_out(T...); } #define debug(...) \ cerr << __LINE__ << " [" << #__VA_ARGS__ << "]:", debug_out(__VA_ARGS__) #define dump(x) cerr << __LINE__ << " " << #x << " = " << (x) << endl #else #define debug(...) (void(0)) #define dump(x) (void(0)) #endif template void scan(vector& v, T offset = T(0)) { for (auto& x : v) { cin >> x; x += offset; } } template void print(T x, int suc = 1) { cout << x; if (suc == 1) cout << "\n"; else if (suc == 2) cout << " "; } template void print(const vector& v, int suc = 1) { for (int i = 0; i < v.size(); ++i) print(v[i], i == int(v.size()) - 1 ? suc : 2); } struct prepare_io { prepare_io() { cin.tie(nullptr); ios::sync_with_stdio(false); cout << fixed << setprecision(10); } } prep_io; template struct ModInt { using uint = unsigned int; using ull = unsigned long long; using M = ModInt; uint v; ModInt(ll _v = 0) { set_norm(_v % MOD + MOD); } M& set_norm(uint _v) { //[0, MOD * 2)->[0, MOD) v = (_v < MOD) ? _v : _v - MOD; return *this; } explicit operator bool() const { return v != 0; } explicit operator int() const { return v; } M operator+(const M& a) const { return M().set_norm(v + a.v); } M operator-(const M& a) const { return M().set_norm(v + MOD - a.v); } M operator*(const M& a) const { return M().set_norm(ull(v) * a.v % MOD); } M operator/(const M& a) const { return *this * a.inv(); } M& operator+=(const M& a) { return *this = *this + a; } M& operator-=(const M& a) { return *this = *this - a; } M& operator*=(const M& a) { return *this = *this * a; } M& operator/=(const M& a) { return *this = *this / a; } M operator-() const { return M() - *this; } M& operator++(int) { return *this = *this + 1; } M& operator--(int) { return *this = *this - 1; } M pow(ll n) const { if (n < 0) return inv().pow(-n); M x = *this, res = 1; while (n) { if (n & 1) res *= x; x *= x; n >>= 1; } return res; } M inv() const { ll a = v, b = MOD, p = 1, q = 0, t; while (b != 0) { t = a / b; swap(a -= t * b, b); swap(p -= t * q, q); } return M(p); } friend ostream& operator<<(ostream& os, const M& a) { return os << a.v; } friend istream& operator>>(istream& in, M& x) { ll v_; in >> v_; x = M(v_); return in; } bool operator<(const M& r) const { return v < r.v; } bool operator>(const M& r) const { return v < *this; } bool operator<=(const M& r) const { return !(r < *this); } bool operator>=(const M& r) const { return !(*this < r); } bool operator==(const M& a) const { return v == a.v; } bool operator!=(const M& a) const { return v != a.v; } static uint get_mod() { return MOD; } }; // using Mint = ModInt<1000000007>; using Mint = ModInt<998244353>; V fact, ifact, inv; void init() { const int maxv = 1000010; fact.resize(maxv); ifact.resize(maxv); inv.resize(maxv); fact[0] = 1; for (int i = 1; i < maxv; ++i) { fact[i] = fact[i - 1] * i; } ifact[maxv - 1] = fact[maxv - 1].inv(); for (int i = maxv - 2; i >= 0; --i) { ifact[i] = ifact[i + 1] * (i + 1); } for (int i = 1; i < maxv; ++i) { inv[i] = ifact[i] * fact[i - 1]; } } Mint comb(int n, int r) { if (n < 0 || r < 0 || r > n) return Mint(0); return fact[n] * ifact[r] * ifact[n - r]; } // O(k) Mint comb_slow(ll n, ll k) { Mint res = 1; for (int i = 0; i < k; ++i) { res = res * (n - i) * inv[i + 1]; } return res; } // line up // a 'o' + b 'x' Mint comb2(int a, int b) { if (a < 0 || b < 0) return 0; return comb(a + b, a); } // divide a into b groups Mint nhr(int a, int b) { if (b == 0) return Mint(a == 0); return comb(a + b - 1, a); } // O(p + log_p n) Mint lucas(ll n, ll k, int p) { if (n < 0 || k < 0 || k > n) return Mint(0); Mint res = 1; while (n > 0) { res *= comb(n % p, k % p); n /= p; k /= p; } return res; } /** * @docs docs/ntt.md */ template struct NumberTheoreticTransform { D root; V roots = {0, 1}; V rev = {0, 1}; int base = 1, max_base = -1; void init() { int mod = D::get_mod(); int tmp = mod - 1; max_base = 0; while (tmp % 2 == 0) { tmp /= 2; max_base++; } root = 2; while (true) { if (root.pow(1 << max_base).v == 1) { if (root.pow(1 << (max_base - 1)).v != 1) { break; } } root++; } } void ensure_base(int nbase) { if (max_base == -1) init(); if (nbase <= base) return; assert(nbase <= max_base); rev.resize(1 << nbase); for (int i = 0; i < (1 << nbase); ++i) { rev[i] = (rev[i >> 1] >> 1) + ((i & 1) << (nbase - 1)); } roots.resize(1 << nbase); while (base < nbase) { D z = root.pow(1 << (max_base - 1 - base)); for (int i = 1 << (base - 1); i < (1 << base); ++i) { roots[i << 1] = roots[i]; roots[(i << 1) + 1] = roots[i] * z; } ++base; } } void ntt(V& a, bool inv = false) { int n = a.size(); // assert((n & (n - 1)) == 0); int zeros = __builtin_ctz(n); ensure_base(zeros); int shift = base - zeros; for (int i = 0; i < n; i++) { if (i < (rev[i] >> shift)) { swap(a[i], a[rev[i] >> shift]); } } for (int k = 1; k < n; k <<= 1) { for (int i = 0; i < n; i += 2 * k) { for (int j = 0; j < k; j++) { D x = a[i + j]; D y = a[i + j + k] * roots[j + k]; a[i + j] = x + y; a[i + j + k] = x - y; } } } int v = D(n).inv().v; if (inv) { reverse(a.begin() + 1, a.end()); for (int i = 0; i < n; i++) { a[i] *= v; } } } V mul(V a, V b) { if (a.size() == 0 && b.size() == 0) return {}; int s = a.size() + b.size() - 1; int nbase = 1; while ((1 << nbase) < s) nbase++; int sz = 1 << nbase; a.resize(sz); b.resize(sz); ntt(a); ntt(b); for (int i = 0; i < sz; i++) { a[i] *= b[i]; } ntt(a, true); a.resize(s); return a; } }; NumberTheoreticTransform ntt; V stirling_second(int n) { V a(n + 1), b(n + 1); for (int i = 0; i <= n; ++i) { a[i] = ifact[i] * (i % 2 ? -1 : 1); b[i] = Mint(i).pow(n) * ifact[i]; } auto c = ntt.mul(a, b); c.resize(n + 1); return c; } int main() { init(); ntt.init(); int N, M; cin >> N >> M; Mint ans; for (int k = 2; k <= M; ++k) { Mint tr = Mint(k).pow(k - 2) * comb(M, k) * fact[k - 1] * comb(N, k - 1) * Mint(2).pow(k - 1); if (N >= k - 1) { ans -= tr * Mint(M - k).pow((N - (k - 1)) * 2); } } auto vec = stirling_second(N * 2); debug(vec); for (int k = 1; k <= N * 2; ++k) { ans += vec[k] * comb(M, k) * fact[k] * k; } print(ans); return 0; }