/* * C[i] = C[A[i]] であるような i について、 * C[i] = K が成り立つとしたときの答えに K をかけたものを出力すれば良いです。 * * C[i] = K であるような i の個数が s 個だとします。 * そのような i に対する A[i] の場合の数は (N - s)^{s - 1}s^{2} です。 * * C[i] = a (!= K) であるような i の個数が t 個だとします。 * そのような i に対する A[i] の場合の数は (N - t)^t です。 * * よって、 * f(x) = (N - t)^{t} * x^{t} / t! * g(x) = (N - s)^{s - 1} * s^{2} * x^{s} / s! * であるとすると、答えは * N![x^{N}]f^{K - 1}g * と表せます。 * * これは exp, log を用いて時間計算量 O(N log(N)) で求められます。 * */ #include using namespace std; using ll=long long; #define rep(i,a,b) for (int i=(int)(a);i<(int)(b);i++) #include namespace po167{ // |f| = |g| = 2 ^ n template std::vector FPS_cyclic_convolution(std::vector f, std::vector g){ atcoder::internal::butterfly(f); atcoder::internal::butterfly(g); for (int i = 0; i < (int)f.size(); i++) f[i] *= g[i]; atcoder::internal::butterfly_inv(f); T iz = (T)(1) / (T)(f.size()); for (int i = 0; i < (int)f.size(); i++) f[i] *= iz; return f; } } namespace po167{ // return f' template std::vector FPS_differential(std::vector f){ if (f.empty()) return f; for (int i = 0; i < (int)f.size() - 1; i++){ f[i] = f[i + 1] * (T)(i + 1); } f.pop_back(); return f; } } namespace po167{ template std::vector FPS_integral(std::vector f){ if (f.empty()) return f; std::vector num_inv((int)f.size() + 1); num_inv[0] = 1; num_inv[1] = 1; auto m = T::mod(); for (int i = 2; i <= (int)f.size(); i++){ num_inv[i] = (0 - num_inv[m % i]) * (T)(m / i); } f.reserve((int)f.size() + 1); f.push_back(0); for (int i = (int)f.size() - 1; i > 0; i--){ f[i] = f[i - 1] * num_inv[i]; } f[0] = 0; return f; } } namespace po167{ // return 1 / f template std::vector FPS_inv(std::vector f, int len = -1){ if (len == -1) len = f.size(); assert(f[0] != 0); std::vector g = {1 / f[0]}; int s = 1; while(s < len){ // g = 2g_s - f(g_s)^2 (mod x ^ (2 * s)) // g = g - (fg - 1)g // (fg - 1) = 0 (mod x ^ (s)) std::vector n_g(s * 2, 0); std::vector f_s(s * 2, 0); g.resize(s * 2); for (int i = 0; i < s * 2; i++){ if (int(f.size()) > i) f_s[i] = f[i]; n_g[i] = g[i]; } atcoder::internal::butterfly(g); atcoder::internal::butterfly(f_s); for (int i = 0; i < s * 2; i++){ f_s[i] *= g[i]; } atcoder::internal::butterfly_inv(f_s); T iz = 1 / (T)(s * 2); for (int i = s; i < s * 2; i++){ f_s[i] *= iz; } for (int i = 0; i < s; i++){ f_s[i] = 0; } atcoder::internal::butterfly(f_s); for (int i = 0; i < s * 2; i++){ f_s[i] *= g[i]; } atcoder::internal::butterfly_inv(f_s); for (int i = s; i < s * 2; i++){ n_g[i] -= f_s[i] * iz; } std::swap(n_g, g); s *= 2; } g.resize(len); return g; } } namespace po167{ template std::vector FPS_exp(std::vector f, int len = -1){ if (len == -1) len = f.size(); if (len == 0) return {}; if (len == 1) return {T(1)}; assert(!f.empty() && f[0] == 0); int s = 1; // simple std::vector g = {T(1)}; while (s < len){ // g' / g // A * B std::vector A = g, B = g; A = FPS_differential(A); B = FPS_inv(B, 2 * s); A.resize(2 * s); A = FPS_cyclic_convolution(A, B); A.pop_back(); A = FPS_integral(A); for (int i = 0; i < s; i++) A[i] = 0; for (int i = s; i < s * 2; i++) A[i] = (i < (int)f.size() ? f[i] : 0) - A[i]; // g_hat = g (1 - g + f) // g += B = g * A g.resize(2 * s); B = FPS_cyclic_convolution(A, g); for (int i = s; i < s * 2; i++) g[i] = B[i]; s *= 2; } g.resize(len); return g; } } namespace po167{ template std::vector FPS_log(std::vector f, int len = -1){ if (len == -1) len = f.size(); if (len == 0) return {}; if (len == 1) return {T(0)}; assert(!f.empty() && f[0] == 1); std::vector res = atcoder::convolution(FPS_differential(f), FPS_inv(f, len)); res.resize(len - 1); return FPS_integral(res); } } namespace po167{ template std::vector FPS_pow(std::vector f,long long M, int len = -1){ if (len == -1) len = f.size(); std::vector res(len, 0); if (M == 0){ res[0] = 1; return res; } for (int i = 0; i < (int)f.size(); i++){ if (f[i] == 0) continue; if (i > (len - 1) / M) break; std::vector g((int)f.size() - i); T v = (T)(1) / (T)(f[i]); for (int j = i; j < (int)f.size(); j++){ g[j - i] = f[j] * v; } long long zero = i * M; if (i) len -= i * M; g = FPS_log(g, len); for (T &x : g) x *= M; g = FPS_exp(g, len); v = (T)(1) / v; T c = 1; while (M){ if (M & 1) c = c * v; v = v * v; M >>= 1; } for (int j = 0; j < len; j++) res[j + zero] = g[j] * c; return res; } return res; } } using mint = atcoder::modint998244353; int main() { int N; ll K; cin >> N >> K; vector fact(N + 1, 1), invfact(N + 1); rep(i, 0, N) fact[i + 1] = fact[i] * (i + 1); invfact[N] = fact[N].inv(); for (int i = N; i > 0; i--){ invfact[i - 1] = invfact[i] * i; } vector f(N + 1); rep(i, 0, N + 1){ f[i] = invfact[i]; f[i] *= ((mint)(N - i)).pow(i); } f = po167::FPS_pow(f, K - 1); mint ans = 0; rep(i, 1, N + 1){ mint tmp = f[N - i]; tmp *= invfact[i]; tmp *= ((mint)(N - i)).pow(i - 1); tmp *= i; tmp *= i; ans += tmp; } ans *= fact[N]; ans *= K; cout << ans.val() << "\n"; }