use std::io::Read; fn get_word() -> String { let stdin = std::io::stdin(); let mut stdin=stdin.lock(); let mut u8b: [u8; 1] = [0]; loop { let mut buf: Vec<u8> = Vec::with_capacity(16); loop { let res = stdin.read(&mut u8b); if res.unwrap_or(0) == 0 || u8b[0] <= b' ' { break; } else { buf.push(u8b[0]); } } if buf.len() >= 1 { let ret = String::from_utf8(buf).unwrap(); return ret; } } } fn get<T: std::str::FromStr>() -> T { get_word().parse().ok().unwrap() } /// Verified by https://atcoder.jp/contests/abc198/submissions/21774342 mod mod_int { use std::ops::*; pub trait Mod: Copy { fn m() -> i64; } #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct ModInt<M> { pub x: i64, phantom: ::std::marker::PhantomData<M> } impl<M: Mod> ModInt<M> { // x >= 0 pub fn new(x: i64) -> Self { ModInt::new_internal(x % M::m()) } fn new_internal(x: i64) -> Self { ModInt { x: x, phantom: ::std::marker::PhantomData } } pub fn pow(self, mut e: i64) -> Self { debug_assert!(e >= 0); let mut sum = ModInt::new_internal(1); let mut cur = self; while e > 0 { if e % 2 != 0 { sum *= cur; } cur *= cur; e /= 2; } sum } #[allow(dead_code)] pub fn inv(self) -> Self { self.pow(M::m() - 2) } } impl<M: Mod> Default for ModInt<M> { fn default() -> Self { Self::new_internal(0) } } impl<M: Mod, T: Into<ModInt<M>>> Add<T> for ModInt<M> { type Output = Self; fn add(self, other: T) -> Self { let other = other.into(); let mut sum = self.x + other.x; if sum >= M::m() { sum -= M::m(); } ModInt::new_internal(sum) } } impl<M: Mod, T: Into<ModInt<M>>> Sub<T> for ModInt<M> { type Output = Self; fn sub(self, other: T) -> Self { let other = other.into(); let mut sum = self.x - other.x; if sum < 0 { sum += M::m(); } ModInt::new_internal(sum) } } impl<M: Mod, T: Into<ModInt<M>>> Mul<T> for ModInt<M> { type Output = Self; fn mul(self, other: T) -> Self { ModInt::new(self.x * other.into().x % M::m()) } } impl<M: Mod, T: Into<ModInt<M>>> AddAssign<T> for ModInt<M> { fn add_assign(&mut self, other: T) { *self = *self + other; } } impl<M: Mod, T: Into<ModInt<M>>> SubAssign<T> for ModInt<M> { fn sub_assign(&mut self, other: T) { *self = *self - other; } } impl<M: Mod, T: Into<ModInt<M>>> MulAssign<T> for ModInt<M> { fn mul_assign(&mut self, other: T) { *self = *self * other; } } impl<M: Mod> Neg for ModInt<M> { type Output = Self; fn neg(self) -> Self { ModInt::new(0) - self } } impl<M> ::std::fmt::Display for ModInt<M> { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { self.x.fmt(f) } } impl<M: Mod> ::std::fmt::Debug for ModInt<M> { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { let (mut a, mut b, _) = red(self.x, M::m()); if b < 0 { a = -a; b = -b; } write!(f, "{}/{}", a, b) } } impl<M: Mod> From<i64> for ModInt<M> { fn from(x: i64) -> Self { Self::new(x) } } // Finds the simplest fraction x/y congruent to r mod p. // The return value (x, y, z) satisfies x = y * r + z * p. fn red(r: i64, p: i64) -> (i64, i64, i64) { if r.abs() <= 10000 { return (r, 1, 0); } let mut nxt_r = p % r; let mut q = p / r; if 2 * nxt_r >= r { nxt_r -= r; q += 1; } if 2 * nxt_r <= -r { nxt_r += r; q -= 1; } let (x, z, y) = red(nxt_r, r); (x, y - q * z, z) } } // mod mod_int macro_rules! define_mod { ($struct_name: ident, $modulo: expr) => { #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct $struct_name {} impl mod_int::Mod for $struct_name { fn m() -> i64 { $modulo } } } } const MOD: i64 = 998_244_353; define_mod!(P, MOD); type MInt = mod_int::ModInt<P>; // FFT (in-place, verified as NTT only) // R: Ring + Copy // Verified by: https://judge.yosupo.jp/submission/53831 // Adopts the technique used in https://judge.yosupo.jp/submission/3153. mod fft { use std::ops::*; // n should be a power of 2. zeta is a primitive n-th root of unity. // one is unity // Note that the result is bit-reversed. pub fn fft<R>(f: &mut [R], zeta: R, one: R) where R: Copy + Add<Output = R> + Sub<Output = R> + Mul<Output = R> { let n = f.len(); assert!(n.is_power_of_two()); let mut m = n; let mut base = zeta; unsafe { while m > 2 { m >>= 1; let mut r = 0; while r < n { let mut w = one; for s in r..r + m { let &u = f.get_unchecked(s); let d = *f.get_unchecked(s + m); *f.get_unchecked_mut(s) = u + d; *f.get_unchecked_mut(s + m) = w * (u - d); w = w * base; } r += 2 * m; } base = base * base; } if m > 1 { // m = 1 let mut r = 0; while r < n { let &u = f.get_unchecked(r); let d = *f.get_unchecked(r + 1); *f.get_unchecked_mut(r) = u + d; *f.get_unchecked_mut(r + 1) = u - d; r += 2; } } } } pub fn inv_fft<R>(f: &mut [R], zeta_inv: R, one: R) where R: Copy + Add<Output = R> + Sub<Output = R> + Mul<Output = R> { let n = f.len(); assert!(n.is_power_of_two()); let zeta = zeta_inv; // inverse FFT let mut zetapow = Vec::with_capacity(20); { let mut m = 1; let mut cur = zeta; while m < n { zetapow.push(cur); cur = cur * cur; m *= 2; } } let mut m = 1; unsafe { if m < n { zetapow.pop(); let mut r = 0; while r < n { let &u = f.get_unchecked(r); let d = *f.get_unchecked(r + 1); *f.get_unchecked_mut(r) = u + d; *f.get_unchecked_mut(r + 1) = u - d; r += 2; } m = 2; } while m < n { let base = zetapow.pop().unwrap(); let mut r = 0; while r < n { let mut w = one; for s in r..r + m { let &u = f.get_unchecked(s); let d = *f.get_unchecked(s + m) * w; *f.get_unchecked_mut(s) = u + d; *f.get_unchecked_mut(s + m) = u - d; w = w * base; } r += 2 * m; } m *= 2; } } } } // Depends on: fft.rs, MInt.rs // Verified by: ABC269-Ex (https://atcoder.jp/contests/abc269/submissions/39116328) pub struct FPSOps<M: mod_int::Mod = P> { gen: mod_int::ModInt<M>, } impl<M: mod_int::Mod> FPSOps<M> { pub fn new(gen: mod_int::ModInt<M>) -> Self { FPSOps { gen: gen } } } impl<M: mod_int::Mod> FPSOps<M> { pub fn add(&self, mut a: Vec<mod_int::ModInt<M>>, mut b: Vec<mod_int::ModInt<M>>) -> Vec<mod_int::ModInt<M>> { if a.len() < b.len() { std::mem::swap(&mut a, &mut b); } for i in 0..b.len() { a[i] += b[i]; } a } pub fn mul(&self, a: Vec<mod_int::ModInt<M>>, b: Vec<mod_int::ModInt<M>>) -> Vec<mod_int::ModInt<M>> { type MInt<M> = mod_int::ModInt<M>; let n = a.len() - 1; let m = b.len() - 1; let mut p = 1; while p <= n + m { p *= 2; } let mut f = vec![MInt::new(0); p]; let mut g = vec![MInt::new(0); p]; for i in 0..n + 1 { f[i] = a[i]; } for i in 0..m + 1 { g[i] = b[i]; } let fac = MInt::new(p as i64).inv(); let zeta = self.gen.pow((M::m() - 1) / p as i64); fft::fft(&mut f, zeta, 1.into()); fft::fft(&mut g, zeta, 1.into()); for i in 0..p { f[i] *= g[i] * fac; } fft::inv_fft(&mut f, zeta.inv(), 1.into()); f.truncate(n + m + 1); f } } // Computes f^{-1} mod x^{f.len()}. // Reference: https://codeforces.com/blog/entry/56422 // Complexity: O(n log n) // Verified by: https://judge.yosupo.jp/submission/3219 // Depends on: MInt.rs, fft.rs fn fps_inv<P: mod_int::Mod + PartialEq>( f: &[mod_int::ModInt<P>], gen: mod_int::ModInt<P> ) -> Vec<mod_int::ModInt<P>> { let n = f.len(); assert!(n.is_power_of_two()); assert_eq!(f[0], 1.into()); let mut sz = 1; let mut r = vec![mod_int::ModInt::new(0); n]; let mut tmp_f = vec![mod_int::ModInt::new(0); n]; let mut tmp_r = vec![mod_int::ModInt::new(0); n]; r[0] = 1.into(); // Adopts the technique used in https://judge.yosupo.jp/submission/3153 while sz < n { let zeta = gen.pow((P::m() - 1) / sz as i64 / 2); tmp_f[..2 * sz].copy_from_slice(&f[..2 * sz]); tmp_r[..2 * sz].copy_from_slice(&r[..2 * sz]); fft::fft(&mut tmp_r[..2 * sz], zeta, 1.into()); fft::fft(&mut tmp_f[..2 * sz], zeta, 1.into()); let fac = mod_int::ModInt::new(2 * sz as i64).inv().pow(2); for i in 0..2 * sz { tmp_f[i] *= tmp_r[i]; } fft::inv_fft(&mut tmp_f[..2 * sz], zeta.inv(), 1.into()); for v in &mut tmp_f[..sz] { *v = 0.into(); } fft::fft(&mut tmp_f[..2 * sz], zeta, 1.into()); for i in 0..2 * sz { tmp_f[i] = -tmp_f[i] * tmp_r[i] * fac; } fft::inv_fft(&mut tmp_f[..2 * sz], zeta.inv(), 1.into()); r[sz..2 * sz].copy_from_slice(&tmp_f[sz..2 * sz]); sz *= 2; } r } type M = MInt; // Copied and modified from https://judge.yosupo.jp/submission/133199. // Originally by sansen. fn middle_product(c: &[M], a: &[M]) -> Vec<M> { assert!(c.len() >= a.len()); if a.len() <= (1 << 5) { return c .windows(a.len()) .map(|c| { c.iter() .zip(a.iter()) .fold(MInt::new(0), |s, a| s + *a.0 * *a.1) }) .collect(); } let size = c.len().next_power_of_two(); let mut x = Vec::from(c); x.resize(size, MInt::new(0)); let mut y = Vec::from(a); y.reverse(); y.resize(size, MInt::new(0)); let zeta = MInt::new(3).pow((MOD - 1) / size as i64); fft::fft(&mut x, zeta, 1.into()); fft::fft(&mut y, zeta, 1.into()); let factor = MInt::new(size as i64).inv(); for i in 0..size { x[i] *= y[i] * factor; } fft::inv_fft(&mut x, zeta.inv(), 1.into()); (a.len()..=c.len()).map(|z| x[z - 1]).collect() } fn multipoint_evaluation(ops: &FPSOps, c: &[MInt], p: &[MInt]) -> Vec<M> { if p.is_empty() { return vec![]; } let n = c.len(); let m = p.len(); let mut prod = vec![vec![]; 2 * m]; for (prod, p) in prod[m..].iter_mut().zip(p.iter()) { *prod = vec![MInt::new(1), -*p]; } for i in (1..m).rev() { prod[i] = ops.mul(prod[2 * i].clone(), prod[2 * i + 1].clone()); } let mut prod1 = prod[1].clone(); let mut sz = 1; while sz < n { sz *= 2; } prod1.resize(sz, 0.into()); let mut inv = fps_inv(&prod1, 3.into()); inv.truncate(n); let mut c = c.to_vec(); c.resize(n + m - 1, MInt::new(0)); let mut dp = vec![vec![]; 2 * m]; dp[1] = middle_product(&c, &inv); for i in 1..m { dp[2 * i] = middle_product(&dp[i], &prod[2 * i + 1]); dp[2 * i + 1] = middle_product(&dp[i], &prod[2 * i]); } dp[m..].iter().map(|dp| dp[0]).collect() } // End of copy-pasted part. fn fps_mul_all(ops: &FPSOps, f: &[Vec<MInt>]) -> Vec<MInt> { let m = f.len(); let mut seg = vec![vec![]; 2 * m]; for i in 0..m { seg[i + m] = f[i].to_vec(); } for i in (1..m).rev() { seg[i] = ops.mul( std::mem::replace(&mut seg[2 * i], vec![]), std::mem::replace(&mut seg[2 * i + 1], vec![]), ); } std::mem::replace(&mut seg[1], vec![]) } fn fps_common_denom(ops: &FPSOps, frac: &[(Vec<MInt>, Vec<MInt>)]) -> (Vec<MInt>, Vec<MInt>) { let m = frac.len(); let mut seg = vec![(vec![], vec![]); 2 * m]; for i in 0..m { seg[i + m] = frac[i].clone(); } for i in (1..m).rev() { let den = ops.mul(seg[2 * i].1.clone(), seg[2 * i + 1].1.clone()); let mut num = ops.mul( std::mem::replace(&mut seg[2 * i].1, vec![]), std::mem::replace(&mut seg[2 * i + 1].0, vec![]), ); let tmp = ops.mul( std::mem::replace(&mut seg[2 * i].0, vec![]), std::mem::replace(&mut seg[2 * i + 1].1, vec![]), ); num = ops.add(num, tmp); seg[i] = (num, den); } std::mem::replace(&mut seg[1], (vec![], vec![])) } // https://37zigen.com/lagrange-interpolation/ fn lagrange_interpolate(ops: &FPSOps, xy: &[(MInt, MInt)]) -> Vec<MInt> { let n = xy.len(); let mut xs = vec![MInt::new(0); n]; let mut ps = vec![vec![]; n]; for i in 0..n { xs[i] = xy[i].0; ps[i] = vec![-xy[i].0, 1.into()]; } let g = fps_mul_all(ops, &ps); let mut gdash = vec![MInt::new(0); n]; for i in 0..n { gdash[i] = g[i + 1] * (i + 1) as i64; } let vals = multipoint_evaluation(ops, &gdash, &xs); let mut fracs = vec![(vec![MInt::new(1)], vec![]); n]; for i in 0..n { fracs[i].0[0] = vals[i].inv() * xy[i].1; fracs[i].1 = vec![-xy[i].0, 1.into()]; } let (num, _) = fps_common_denom(ops, &fracs); num } // https://ferin-tech.hatenablog.com/entry/2019/08/11/%E3%83%A9%E3%82%B0%E3%83%A9%E3%83%B3%E3%82%B8%E3%83%A5%E8%A3%9C%E9%96%93 // Finds f(t) given y[i] = f(x0 + d * i) for 0 <= i < y.len(). // O(y.len() * log MOD)-time fn lagrange_interpolate_one_arithprog(y: &[MInt], x0: MInt, d: MInt, t: MInt) -> MInt { assert_ne!(d, 0.into()); let n = y.len(); let mut sum = MInt::new(0); // (x-x0-d*i)/((x-x0)...(x-x0-d*(n-1)))|_{x=x0+d*i} let mut cur = MInt::new(1); // (t-x0)...(t-x0-d*(n-1)) let mut tprod = MInt::new(1); for i in 1..n { cur *= -d * i as i64; } cur = cur.inv(); for i in 0..n { if t == x0 + d * i as i64 { return y[i]; } tprod *= t - x0 - d * i as i64; } for i in 0..n { sum += y[i] * cur * tprod * (t - x0 - d * i as i64).inv(); if i + 1 < n { cur *= (n - i - 1) as i64; cur *= -MInt::new((i + 1) as i64).inv(); } } sum } // Generated by 2747-helper.rs const STEP: usize = 10000000; const LEN: usize = 100; const FACT_TABLE: [i64; 100] = [ 1, 295201906, 160030060, 957629942, 545208507, 213689172, 760025067, 939830261, 506268060, 39806322, 808258749, 440133909, 686156489, 741797144, 390377694, 12629586, 544711799, 104121967, 495867250, 421290700, 117153405, 57084755, 202713771, 675932866, 79781699, 956276337, 652678397, 35212756, 655645460, 468129309, 761699708, 533047427, 287671032, 206068022, 50865043, 144980423, 111276893, 259415897, 444094191, 593907889, 573994984, 892454686, 566073550, 128761001, 888483202, 251718753, 548033568, 428105027, 742756734, 546182474, 62402409, 102052166, 826426395, 159186619, 926316039, 176055335, 51568171, 414163604, 604947226, 681666415, 511621808, 924112080, 265769800, 955559118, 763148293, 472709375, 19536133, 860830935, 290471030, 851685235, 242726978, 169855231, 612759169, 599797734, 961628039, 953297493, 62806842, 37844313, 909741023, 689361523, 887890124, 380694152, 669317759, 367270918, 806951470, 843736533, 377403437, 945260111, 786127243, 80918046, 875880304, 364983542, 623250998, 598764068, 804930040, 24257676, 214821357, 791011898, 954947696, 183092975, ]; // https://yukicoder.me/problems/no/2747 (3.5) // solved with hints // \sum_{1 <= i <= N} (N-i)i^K が計算できれば良い。これはベルヌーイ数の先頭 K 項が O(K log K)-time 程度で計算できれば計算できる。 // -> 解説を見た。ラグランジュ補間の方が簡単。最終的な多項式は K+2 次なので、0 <= i <= K+2 の K+3 点で補間する。 // 最後に (N-2)! * (N-1) * 2 を掛けること。 // - (N-2)!: 残りの点の埋め方 // - (N-1): どの隙間を見るか // - 2: 左の方が大きいか // Tags: lagrange-polynomial-interpolation, lagrange-interpolation fn main() { let n: i64 = get(); let k: i64 = get(); let mut y = vec![]; let mut sum = MInt::new(0); for i in 0..k + 3 { sum += MInt::new(i).pow(k) * (n - i); y.push(sum); } let mut ans = lagrange_interpolate_one_arithprog(&y, 0.into(), 1.into(), n.into()); ans *= 2; let tbl_idx = ((n - 1) as usize / STEP).min(LEN - 1); let mut fac = MInt::new(FACT_TABLE[tbl_idx]); for i in tbl_idx * STEP + 1..=(n - 1) as usize { fac *= i as i64; } ans *= fac; println!("{ans}"); }