mod = 998244353 n = 2000000 Inv = [1 for j in range(n+1)] for a in range(2,n+1): # ax + py = 1 <=> rx + p(-x-qy) = -q => x = -(inv[r]) * (p//a) (r = p % a) res = (mod - Inv[mod%a]) * (mod // a) Inv[a] = res % mod def inv(x): x %= mod if x <= 2*10**6: return Inv[x] else: res = pow(x,mod-2,mod) return res fact = [1 for i in range(n + 1)] for i in range(1,n + 1): fact[i] = fact[i - 1] * i % mod fact_inv = [1 for i in range(n + 1)] fact_inv[-1] = pow(fact[-1],mod - 2,mod) for i in range(n,0,-1): fact_inv[i - 1] = fact_inv[i] * i % mod def binom(n,r): if n < r or n < 0 or r < 0: return 0 res = fact_inv[n - r] * fact_inv[r] % mod res *= fact[n] res %= mod return res NTT_friend = [120586241,167772161,469762049,754974721,880803841,924844033,943718401,998244353,1045430273,1051721729,1053818881] NTT_dict = {} for i in range(len(NTT_friend)): NTT_dict[NTT_friend[i]] = i NTT_info = [[20,74066978],[25,17],[26,30],[24,362],[23,211],[21,44009197],[22,663003469],[23,31],[20,363],[20,330],[20,2789]] def popcount(n): c = (n&0x5555555555555555) + ((n>>1)&0x5555555555555555) c = (c&0x3333333333333333) + ((c>>2)&0x3333333333333333) c = (c&0x0f0f0f0f0f0f0f0f) + ((c>>4)&0x0f0f0f0f0f0f0f0f) c = (c&0x00ff00ff00ff00ff) + ((c>>8)&0x00ff00ff00ff00ff) c = (c&0x0000ffff0000ffff) + ((c>>16)&0x0000ffff0000ffff) c = (c&0x00000000ffffffff) + ((c>>32)&0x00000000ffffffff) return c def topbit(n): h = n.bit_length() h -= 1 return h def prepared_fft(mod = 998244353): rank2 = NTT_info[NTT_dict[mod]][0] root,iroot = [0] * 30,[0] * 30 rate2,irate2= [0] * 30,[0] * 30 rate3,irate3= [0] * 30,[0] * 30 root[rank2] = NTT_info[NTT_dict[mod]][1] iroot[rank2] = pow(root[rank2],mod - 2,mod) for i in range(rank2 - 1,-1,-1): root[i] = root[i + 1] * root[i + 1] % mod iroot[i] = iroot[i + 1] * iroot[i + 1] % mod prod,iprod = 1,1 for i in range(rank2-1): rate2[i] = root[i + 2] * prod % mod irate2[i] = iroot[i + 2] * iprod % mod prod = prod * iroot[i + 2] % mod iprod = iprod * root[i + 2] % mod prod,iprod = 1,1 for i in range(rank2-2): rate3[i] = root[i + 3] * prod % mod irate3[i] = iroot[i + 3] * iprod % mod prod = prod * iroot[i + 3] % mod iprod = iprod * root[i + 3] % mod return root,iroot,rate2,irate2,rate3,irate3 root,iroot,rate2,irate2,rate3,irate3 = prepared_fft() def ntt(a): n = len(a) h = topbit(n) assert (n == 1 << h) le = 0 while le < h: if h - le == 1: p = 1 << (h - le - 1) rot = 1 for s in range(1 << le): offset = s << (h - le) for i in range(p): l = a[i + offset] r = a[i + offset + p] * rot % mod a[i + offset] = (l + r) % mod a[i + offset + p] = (l - r) % mod rot = rot * rate2[topbit(~s & -~s)] % mod le += 1 else: p = 1 << (h - le - 2) rot,imag = 1,root[2] for s in range(1 << le): rot2 = rot * rot % mod rot3 = rot2 * rot % mod offset = s << (h - le) for i in range(p): a0 = a[i + offset] a1 = a[i + offset + p] * rot a2 = a[i + offset + p * 2] * rot2 a3 = a[i + offset + p * 3] * rot3 a1na3imag = (a1 - a3) % mod * imag a[i + offset] = (a0 + a2 + a1 + a3) % mod a[i + offset + p] = (a0 + a2 - a1 - a3) % mod a[i + offset + p * 2] = (a0 - a2 + a1na3imag) % mod a[i + offset + p * 3] = (a0 - a2 - a1na3imag) % mod rot = rot * rate3[topbit(~s & -~s)] % mod le += 2 def intt(a): n = len(a) h = topbit(n) assert (n == 1 << h) coef = pow(n,mod - 2,mod) for i in range(n): a[i] = a[i] * coef % mod le = h while le: if le == 1: p = 1 << (h - le) irot = 1 for s in range(1 << (le - 1)): offset = s << (h - le + 1) for i in range(p): l = a[i + offset] r = a[i + offset + p] a[i + offset] = (l + r) % mod a[i + offset + p] = (l - r) * irot % mod irot = irot * irate2[topbit(~s & -~s)] % mod le -= 1 else: p = 1 << (h - le) irot,iimag = 1,iroot[2] for s in range(1 << (le - 2)): irot2 = irot * irot % mod irot3 = irot2 * irot % mod offset = s << (h - le + 2) for i in range(p): a0 = a[i + offset] a1 = a[i + offset + p] a2 = a[i + offset + p * 2] a3 = a[i + offset + p * 3] a2na3iimag = (a2 - a3) * iimag % mod a[i + offset] = (a0 + a1 + a2 + a3) % mod a[i + offset + p] = (a0 - a1 + a2na3iimag) * irot % mod a[i + offset + p * 2] = (a0 + a1 - a2 - a3) * irot2 % mod a[i + offset + p * 3] = (a0 - a1 - a2na3iimag) * irot3 % mod irot *= irate3[topbit(~s & -~s)] irot %= mod le -= 2 def convolute_naive(a,b): res = [0] * (len(a) + len(b) - 1) for i in range(len(a)): for j in range(len(b)): res[i + j] = (res[i + j] + a[i] * b[j] % mod) % mod return res def convolute(a,b): s = a[:] t = b[:] n = len(s) m = len(t) if min(n,m) <= 60: return convolute_naive(s,t) le = 1 while le < n + m - 1: le *= 2 s += [0] * (le - n) t += [0] * (le - m) ntt(s) ntt(t) for i in range(le): s[i] = s[i] * t[i] % mod intt(s) s = s[:n + m - 1] return s def fps_inv(f,deg = -1): assert (f[0] != 0) if deg == -1: deg = len(f) res = [0] * deg res[0] = pow(f[0],mod-2,mod) d = 1 while d < deg: a = [0] * (d << 1) tmp = min(len(f),d << 1) a[:tmp] = f[:tmp] b = [0] * (d << 1) b[:d] = res[:d] ntt(a) ntt(b) for i in range(d << 1): a[i] = a[i] * b[i] % mod intt(a) a[:d] = [0] * d ntt(a) for i in range(d << 1): a[i] = a[i] * b[i] % mod intt(a) for j in range(d,min(d << 1,deg)): if a[j]: res[j] = mod - a[j] else: res[j] = 0 d <<= 1 return res def fps_div(f,g): n,m = len(f),len(g) if n < m: return [],f rev_f = f[:] rev_f = rev_f[::-1] rev_g = g[:] rev_g = rev_g[::-1] rev_q = convolute(rev_f,fps_inv(rev_g,n-m+1))[:n-m+1] q = rev_q[:] q = q[::-1] p = convolute(g,q) r = f[:] for i in range(min(len(p),len(r))): r[i] -= p[i] r[i] %= mod while len(r): if r[-1] != 0: break r.pop() return q,r def fps_add(f,g): n = max(len(f),len(g)) res = [0] * n for i in range(len(f)): res[i] = f[i] for i in range(len(g)): res[i] = (res[i] + g[i]) % mod return res def fps_diff(f): if len(f) <= 1: return [0] res = [] for i in range(1,len(f)): res.append(i * f[i] % mod) return res def fps_integrate(f): n = len(f) res = [0] * (n + 1) for i in range(n): res[i+1] = pow(i + 1,mod-2,mod) * f[i] % mod return res def fps_log(f,deg = -1): assert (f[0] == 1) if deg == -1: deg = len(f) res = convolute(fps_diff(f),fps_inv(f,deg)) res = fps_integrate(res) return res[:deg] def fps_exp(f,deg = -1): assert (f[0] == 0) if deg == -1: deg = len(f) res = [1,0] if len(f) > 1: res[1] = f[1] g = [1] p = [] q = [1,1] m = 2 while m < deg: y = res + [0]*m ntt(y) p = q[:] z = [y[i] * p[i] for i in range(len(p))] intt(z) z[:m >> 1] = [0] * (m >> 1) ntt(z) for i in range(len(p)): z[i] = z[i] * (-p[i]) % mod intt(z) g[m >> 1:] = z[m >> 1:] q = g + [0] * m ntt(q) tmp = min(len(f),m) x = f[:tmp] + [0] * (m - tmp) x = fps_diff(x) x.append(0) ntt(x) for i in range(len(x)): x[i] = x[i] * y[i] % mod intt(x) for i in range(len(res)): if i == 0: continue x[i-1] -= res[i] * i % mod x += [0] * m for i in range(m-1): x[m+i],x[i] = x[i],0 ntt(x) for i in range(len(q)): x[i] = x[i] * q[i] % mod intt(x) x.pop() x = fps_integrate(x) x[:m] = [0] * m for i in range(m,min(len(f),m << 1)): x[i] += f[i] ntt(x) for i in range(len(y)): x[i] = x[i] * y[i] % mod intt(x) res[m:] = x[m:] m <<= 1 return res[:deg] def fps_pow(f,k,deg = -1): if deg == -1: deg = len(f) if k == 0: return [1] + [0] * (deg - 1) while len(f) < deg: f.append(0) p = 0 while p < deg: if f[p]: break p += 1 if p * k >= deg: return [0] * deg a = f[p] g = [0 for _ in range(deg - p)] a_inv = pow(a,mod-2,mod) for i in range(deg - p): g[i] = f[i + p] * a_inv % mod g = fps_log(g) for i in range(deg-p): g[i] = g[i] * k % mod g = fps_exp(g) a = pow(a,k,mod) res = [0] * deg for i in range(deg): j = i + p * k if j >= deg: break res[j] = g[i] * a % mod return res def transposed_ntt(a): b = a[:] intt(b) b = [b[0]] + b[1:][::-1] for i in range(len(a)): a[i] = b[i] * len(a) % mod return a def transposed_ntt_inv(a): b = [a[0]] + a[1:][::-1] ntt(b) n = len(b) n_inv = pow(n,mod - 2,mod) for i in range(len(b)): a[i] = b[i] * n_inv % mod return def ntt_doubling(a,flag = 1): root,iroot,rate2,irate2,rate3,irate3 = prepared_fft(mod) if flag == 0: M = len(a) // 2 tmp = a[:M] aa = a[M:] transposed_ntt(aa) r = 1 zeta = root[topbit(2*M)] for i in range(M): aa[i] = aa[i] * r % mod r = r * zeta % mod transposed_ntt_inv(aa) for i in range(M): aa[i] = (aa[i] + tmp[i]) % mod while len(a) > M: a.pop() for i in range(M): a[i] = aa[i] return M = len(a) b = a[:] intt(b) r = 1 zeta = root[topbit(2*M)] for i in range(M): b[i] = b[i] * r % mod r = r * zeta % mod ntt(b) a += b return def middle_product(a,b): assert (len(a) >= len(b)) # naive if min(len(b), len(a) - len(b) + 1) <= 60: res = [0] * (len(a) - len(b) + 1) for i in range(len(res)): for j in range(len(b)): res[i] = (res[i] + b[j] * a[i + j] % mod) % mod return res n = 1 << (len(a) - 1).bit_length() fa = [0] * n fb = [0] * n for i in range(len(a)): fa[i] = a[i] for i in range(len(b)): fb[i] = b[~i] ntt(fa) ntt(fb) for i in range(n): fa[i] = fa[i] * fb[i] % mod intt(fa) fa = fa[len(b) - 1:len(a)] return fa def multipoint_evaluation(f,point): n = 1 while n < len(point): n <<= 1 k = topbit(n) F = [[0 for _ in range(n)] for _ in range(k + 1)] F2 = [[0 for _ in range(n)] for _ in range(k + 1)] G = [[0 for _ in range(n)] for _ in range(k + 1)] for i in range(len(point)): F[0][i] = (-point[i]) % mod for d in range(k): b = 1 << d L = 0 while L < n: f1 = F[d][L:L+b] f2 = F[d][L+b:L+2*b] ntt_doubling(f1) ntt_doubling(f2) for i in range(b): f1[i] = (f1[i] + 1) % mod f2[i] = (f2[i] + 1) % mod for i in range(b,2*b): f1[i] = (f1[i] - 1) % mod f2[i] = (f2[i] - 1) % mod for i in range(2 * b): F[d][L + i] = f1[i] F2[d][L + i] = f2[i] F[d + 1][L + i] = (f1[i] * f2[i] % mod - 1) % mod L += 2 * b P = F[k][:] intt(P) P.append(1) P = P[::-1] P = P[:len(f)] while len(P) < len(f): P.append(0) P = fps_inv(P) f = f[:n + len(P) - 1] while len(f) < n + len(P) - 1: f.append(0) f = middle_product(f,P) f = f[::-1] transposed_ntt_inv(f) G[k] = f for d in range(k - 1,-1,-1): b = 1 << d L = 0 while L < n: g1 = [0] * (2 * b) g2 = [0] * (2 * b) for i in range(2 * b): g1[i] = G[d + 1][L + i] * F2[d][L + i] % mod g2[i] = G[d + 1][L + i] * F[d][L + i] % mod ntt_doubling(g1,0) ntt_doubling(g2,0) for i in range(b): G[d][L + i] = g1[i] G[d][L + b + i] = g2[i] L += 2 * b res = G[0][:len(point)] return res def online_convolute(F): N = len(F) def f(l,r): if l + 1 == r: return F[l] else: m = (l + r) // 2 res = convolute(f(l,m),f(m,r)) return res return f(0,N) def sum_of_rationals(W,A): # sum (W[i] / (x - A[i])) assert (len(W) == len(A)) N = len(W) def calc(l,r): if l + 1 == r: return ([W[l]],[-A[l],1]) m = (l + r) // 2 f,ff = calc(l,m) g,gg = calc(m,r) h = fps_add(convolute(f,gg),convolute(ff,g)) hh = convolute(ff,gg) return (h,hh) return calc(0,N) def polynominal_interpolation(X,Y): assert (len(X) == len(Y)) N = len(X) G = [[-X[i],1] for i in range(N)] g = online_convolute(G) gg = fps_diff(g) YY = multipoint_evaluation(gg,X) for i in range(N): Y[i] = Y[i] * pow(YY[i],mod - 2,mod) % mod return sum_of_rationals(Y,X)[0] def shift_of_sampling_points(Y,M,c): # https://suisen-cp.github.io/cp-library-cpp/library/polynomial/shift_of_sampling_points.hpp N = len(Y) # step1 A = [Y[j] * fact_inv[j] % mod for j in range(N)] B = [fact_inv[i] * pow(-1,i) % mod for i in range(N)] f = convolute(A,B)[:N] if M == 1: d = 1 res = 0 for i in range(N): res += f[i] * d % mod res %= mod d = d * (c - i) % mod return [res] # step2 A = [f[i] * fact[i] % mod for i in range(N)] A = A[::-1] B = [fact_inv[j] for j in range(N)] b = 1 for i in range(N): B[i] = B[i] * b % mod b = b * (c - i) % mod B = convolute(A,B)[:N] A = [B[N - 1 - j] * fact_inv[j] % mod for j in range(N)] B = [fact_inv[i] for i in range(M)] res = convolute(A,B)[:M] for i in range(M): res[i] = res[i] * fact[i] % mod return res K = 9 B = 1 << K P = mod i = 1 point = [1,3] while i < K: t = 1 << i f = point + shift_of_sampling_points(point,3 * t,t) point = [0 for j in range(2 * t)] for j in range(2 * t): point[j] = (f[2 * j] * f[2 * j + 1] % mod) * (t * (2 * j + 1) % mod) % mod i += 1 point = shift_of_sampling_points(point,P // B,0) T = [1] + point for i in range(1,len(T)): T[i] = T[i] * (i * B) % mod for i in range(len(T) - 1): T[i + 1] = T[i + 1] * T[i] % mod def get_fact(n): r = n % B q = n // B res = T[q] for i in range(1,r + 1): res = res * (q * B + i) % mod return res N,K = map(int,input().split()) # K乗和 y = [0] r = 0 for i in range(1,K+2): r += pow(i,K,mod) r %= mod y.append(r) p = shift_of_sampling_points(y,1,N)[0] # (K + 1) 乗和 y = [0] r = 0 for i in range(1,K+3): r += pow(i,K+1,mod) r %= mod y.append(r) q = shift_of_sampling_points(y,1,N)[0] ans = N*p - q ans %= mod ans *= (N - 1) ans %= mod ans *= 2 ans = ans * get_fact(N - 2) % mod print(ans)