N,P = map(int,input().split()) if N == P: print(pow(N,N - 2,P)) exit() mod = 998244353 Mod,MOD = 1045430273,1051721729 M = [mod,Mod,MOD] n = N fact = [1 for i in range(n+1)] for i in range(1,n+1): fact[i] = fact[i-1] * i % P fact_inv = [1 for i in range(n+1)] fact_inv[-1] = pow(fact[-1],P-2,P) for i in range(n,0,-1): fact_inv[i-1] = fact_inv[i]*i % P def binom(n,r): res = fact[n] * (fact_inv[n - r] * fact_inv[r] % P) % P return res NTT_friend = [120586241,167772161,469762049,754974721,880803841,924844033,943718401,998244353,1045430273,1051721729,1053818881] NTT_dict = {} for i in range(len(NTT_friend)): NTT_dict[NTT_friend[i]] = i NTT_info = [[20,74066978],[25,17],[26,30],[24,362],[23,211],[21,44009197],[22,663003469],[23,31],[20,363],[20,330],[20,2789]] def popcount(n): c=(n&0x5555555555555555)+((n>>1)&0x5555555555555555) c=(c&0x3333333333333333)+((c>>2)&0x3333333333333333) c=(c&0x0f0f0f0f0f0f0f0f)+((c>>4)&0x0f0f0f0f0f0f0f0f) c=(c&0x00ff00ff00ff00ff)+((c>>8)&0x00ff00ff00ff00ff) c=(c&0x0000ffff0000ffff)+((c>>16)&0x0000ffff0000ffff) c=(c&0x00000000ffffffff)+((c>>32)&0x00000000ffffffff) return c def topbit(n): h = n.bit_length() h -= 1 return h def prepared_fft(mod = 998244353): rank2 = NTT_info[NTT_dict[mod]][0] root,iroot = [0] * 30,[0] * 30 rate2,irate2= [0] * 30,[0] * 30 rate3,irate3= [0] * 30,[0] * 30 root[rank2] = NTT_info[NTT_dict[mod]][1] iroot[rank2] = pow(root[rank2],mod - 2,mod) for i in range(rank2-1,-1,-1): root[i] = root[i+1] * root[i+1] % mod iroot[i] = iroot[i+1] * iroot[i+1] % mod prod,iprod = 1,1 for i in range(rank2-1): rate2[i] = root[i + 2] * prod % mod irate2[i] = iroot[i + 2] * iprod % mod prod = prod * iroot[i + 2] % mod iprod = iprod * root[i + 2] % mod prod,iprod = 1,1 for i in range(rank2-2): rate3[i] = root[i + 3] * prod % mod irate3[i] = iroot[i + 3] * iprod % mod prod = prod * iroot[i + 3] % mod iprod = iprod * root[i + 3] % mod return root,iroot,rate2,irate2,rate3,irate3 root,iroot,rate2,irate2,rate3,irate3 = prepared_fft() def ntt(a): n = len(a) h = topbit(n) assert (n == 1 << h) le = 0 while le < h: if h - le == 1: p = 1 << (h - le - 1) rot = 1 for s in range(1 << le): offset = s << (h - le) for i in range(p): l = a[i + offset] r = a[i + offset + p] * rot % mod a[i + offset] = (l + r) % mod a[i + offset + p] = (l - r) % mod rot = rot * rate2[topbit(~s & -~s)] % mod le += 1 else: p = 1 << (h - le - 2) rot,imag = 1,root[2] for s in range(1 << le): rot2 = rot * rot % mod rot3 = rot2 * rot % mod offset = s << (h - le) for i in range(p): a0 = a[i + offset] a1 = a[i + offset + p] * rot a2 = a[i + offset + p * 2] * rot2 a3 = a[i + offset + p * 3] * rot3 a1na3imag = (a1 - a3) % mod * imag a[i + offset] = (a0 + a2 + a1 + a3) % mod a[i + offset + p] = (a0 + a2 - a1 - a3) % mod a[i + offset + p * 2] = (a0 - a2 + a1na3imag) % mod a[i + offset + p * 3] = (a0 - a2 - a1na3imag) % mod rot = rot * rate3[topbit(~s & -~s)] % mod le += 2 def intt(a): n = len(a) h = topbit(n) assert (n == 1 << h) coef = pow(n,mod - 2,mod) for i in range(n): a[i] = a[i] * coef % mod le = h while le: if le == 1: p = 1 << (h - le) irot = 1 for s in range(1 << (le - 1)): offset = s << (h - le + 1) for i in range(p): l = a[i + offset] r = a[i + offset + p] a[i + offset] = (l + r) % mod a[i + offset + p] = (l - r) * irot % mod irot = irot * irate2[topbit(~s & -~s)] % mod le -= 1 else: p = 1 << (h - le) irot,iimag = 1,iroot[2] for s in range(1 << (le - 2)): irot2 = irot * irot % mod irot3 = irot2 * irot % mod offset = s << (h - le + 2) for i in range(p): a0 = a[i + offset] a1 = a[i + offset + p] a2 = a[i + offset + p * 2] a3 = a[i + offset + p * 3] a2na3iimag = (a2 - a3) * iimag % mod a[i + offset] = (a0 + a1 + a2 + a3) % mod a[i + offset + p] = (a0 - a1 + a2na3iimag) * irot % mod a[i + offset + p * 2] = (a0 + a1 - a2 - a3) * irot2 % mod a[i + offset + p * 3] = (a0 - a1 - a2na3iimag) * irot3 % mod irot *= irate3[topbit(~s & -~s)] irot %= mod le -= 2 def convolute_naive(a,b): res = [0] * (len(a) + len(b) - 1) for i in range(len(a)): for j in range(len(b)): res[i+j] = (res[i+j] + a[i] * b[j] % mod) % mod return res def convolute(a,b): s = a[:] t = b[:] n = len(s) m = len(t) if min(n,m) <= 60: return convolute_naive(s,t) le = 1 while le < n + m - 1: le *= 2 s += [0] * (le - n) t += [0] * (le - m) ntt(s) ntt(t) for i in range(le): s[i] = s[i] * t[i] % mod intt(s) s = s[:n + m - 1] return s def fps_inv(f,deg = -1): assert (f[0] != 0) if deg == -1: deg = len(f) res = [0] * deg res[0] = pow(f[0],mod-2,mod) d = 1 while d < deg: a = [0] * (d << 1) tmp = min(len(f),d << 1) a[:tmp] = f[:tmp] b = [0] * (d << 1) b[:d] = res[:d] ntt(a) ntt(b) for i in range(d << 1): a[i] = a[i] * b[i] % mod intt(a) a[:d] = [0] * d ntt(a) for i in range(d << 1): a[i] = a[i] * b[i] % mod intt(a) for j in range(d,min(d << 1,deg)): if a[j]: res[j] = mod - a[j] else: res[j] = 0 d <<= 1 return res def fps_div(f,g): n,m = len(f),len(g) if n < m: return [],f rev_f = f[:] rev_f = rev_f[::-1] rev_g = g[:] rev_g = rev_g[::-1] rev_q = convolute(rev_f,fps_inv(rev_g,n-m+1))[:n-m+1] q = rev_q[:] q = q[::-1] p = convolute(g,q) r = f[:] for i in range(min(len(p),len(r))): r[i] -= p[i] r[i] %= mod while len(r): if r[-1] != 0: break r.pop() return q,r def fps_add(f,g): n = max(len(f),len(g)) res = [0] * n for i in range(len(f)): res[i] = f[i] for i in range(len(g)): res[i] = (res[i] + g[i]) % mod return res def fps_diff(f): if len(f) <= 1: return [0] res = [] for i in range(1,len(f)): res.append(i * f[i] % mod) return res def fps_integrate(f): n = len(f) res = [0] * (n + 1) for i in range(n): res[i+1] = pow(i + 1,mod-2,mod) * f[i] % mod return res def fps_log(f,deg = -1): assert (f[0] == 1) if deg == -1: deg = len(f) res = convolute(fps_diff(f),fps_inv(f,deg)) res = fps_integrate(res) return res[:deg] def fps_exp(f,deg = -1): assert (f[0] == 0) if deg == -1: deg = len(f) res = [1,0] if len(f) > 1: res[1] = f[1] g = [1] p = [] q = [1,1] m = 2 while m < deg: y = res + [0]*m ntt(y) p = q[:] z = [y[i] * p[i] for i in range(len(p))] intt(z) z[:m >> 1] = [0] * (m >> 1) ntt(z) for i in range(len(p)): z[i] = z[i] * (-p[i]) % mod intt(z) g[m >> 1:] = z[m >> 1:] q = g + [0] * m ntt(q) tmp = min(len(f),m) x = f[:tmp] + [0] * (m - tmp) x = fps_diff(x) x.append(0) ntt(x) for i in range(len(x)): x[i] = x[i] * y[i] % mod intt(x) for i in range(len(res)): if i == 0: continue x[i-1] -= res[i] * i % mod x += [0] * m for i in range(m-1): x[m+i],x[i] = x[i],0 ntt(x) for i in range(len(q)): x[i] = x[i] * q[i] % mod intt(x) x.pop() x = fps_integrate(x) x[:m] = [0] * m for i in range(m,min(len(f),m << 1)): x[i] += f[i] ntt(x) for i in range(len(y)): x[i] = x[i] * y[i] % mod intt(x) res[m:] = x[m:] m <<= 1 return res[:deg] def fps_pow(f,k,deg = -1): if deg == -1: deg = len(f) if k == 0: return [1] + [0] * (deg - 1) while len(f) < deg: f.append(0) p = 0 while p < deg: if f[p]: break p += 1 if p * k >= deg: return [0] * deg a = f[p] g = [0 for _ in range(deg - p)] a_inv = pow(a,mod-2,mod) for i in range(deg - p): g[i] = f[i + p] * a_inv % mod g = fps_log(g) for i in range(deg-p): g[i] = g[i] * k % mod g = fps_exp(g) a = pow(a,k,mod) res = [0] * deg for i in range(deg): j = i + p * k if j >= deg: break res[j] = g[i] * a % mod return res def mod_inv(a,mod): if mod == 1: return 0 a %= mod b,s,t = mod,1,0 while True: if a == 1: return s t -= (b // a) * s b %= a if b == 1: return t + mod s -= (a // b) * t a %= b def gcd_inv(a,mod): a %= mod b,s,t = mod,1,0 while True: if a == 0: return (b,t + mod) t -= (b // a) * s b %= a if b == 0: return (a,s) s -= (a // b) * t a %= b # (0,0)のとき存在しない. def garner(Rem,Mod): assert (len(Rem) == len(Mod)) r,m = 0,1 for i in range(len(Rem)): assert (Mod[i]) Rem[i] %= Mod[i] m1,r1 = Mod[i],Rem[i] if m < m1: m,m1,r,r1 = m1,m,r1,r if m % m1 == 0: if r % m1 != r1: return (0,0) g,im = gcd_inv(m,m1) y = abs(r1 - r) if y % g: return (0,0) u1 = m1 // g y = y // g % u1 if (r > r1 and y != 0): y = u1 - y x = y * im % u1 r += x * m m *= u1 return (r,m) # Modの中身が互いに素じゃないとダメ def Garner(Rem,Mod,mod): assert (len(Rem) == len(Mod)) Rem.append(0) Mod.append(mod) n = len(Mod) coffs = [1] * n constants = [0] * n for i in range(n - 1): v = (Rem[i] - constants[i]) * mod_inv(coffs[i],Mod[i]) % Mod[i] for j in range(i + 1,n): constants[j] = (constants[j] + coffs[j] * v) % Mod[j] coffs[j] = (coffs[j] * Mod[i]) % Mod[j] return constants[-1] f = [1] for i in range(1,N): c = pow(i + 1,i - 1,P) * fact_inv[i] % P f.append(c) ans = 0 g = [0] * N g[0] = 1 for n in range(1,N + 1): res = binom(N,n) * pow(n,n - 2,P) % P root,iroot,rate2,irate2,rate3,irate3 = prepared_fft(mod) h = convolute(f,g)[:N] root,iroot,rate2,irate2,rate3,irate3 = prepared_fft(Mod) hh = convolute(f,g)[:N] root,iroot,rate2,irate2,rate3,irate3 = prepared_fft(MOD) hhh = convolute(f,g)[:N] for i in range(N): g[i] = Garner([h[i],hh[i],hhh[i]],[mod,Mod,MOD],P) res = res * fact[N - n] % P res = res * g[N - n] % P ans += res ans %= P print(ans)