// #define _GLIBCXX_DEBUG // #pragma GCC optimize("O2,unroll-loops") #include using namespace std; #define rep(i, n) for (int i = 0; i < int(n); i++) #define per(i, n) for (int i = (n)-1; 0 <= i; i--) #define rep2(i, l, r) for (int i = (l); i < int(r); i++) #define per2(i, l, r) for (int i = (r)-1; int(l) <= i; i--) #define each(e, v) for (auto &e : v) #define MM << " " << #define pb push_back #define eb emplace_back #define all(x) begin(x), end(x) #define rall(x) rbegin(x), rend(x) #define sz(x) (int)x.size() template void print(const vector &v, T x = 0) { int n = v.size(); for (int i = 0; i < n; i++) cout << v[i] + x << (i == n - 1 ? '\n' : ' '); if (v.empty()) cout << '\n'; } using ll = long long; using pii = pair; using pll = pair; template bool chmax(T &x, const T &y) { return (x < y) ? (x = y, true) : false; } template bool chmin(T &x, const T &y) { return (x > y) ? (x = y, true) : false; } template using minheap = std::priority_queue, std::greater>; template using maxheap = std::priority_queue; template int lb(const vector &v, T x) { return lower_bound(begin(v), end(v), x) - begin(v); } template int ub(const vector &v, T x) { return upper_bound(begin(v), end(v), x) - begin(v); } template void rearrange(vector &v) { sort(begin(v), end(v)); v.erase(unique(begin(v), end(v)), end(v)); } // __int128_t gcd(__int128_t a, __int128_t b) { // if (a == 0) // return b; // if (b == 0) // return a; // __int128_t cnt = a % b; // while (cnt != 0) { // a = b; // b = cnt; // cnt = a % b; // } // return b; // } struct Union_Find_Tree { vector data; const int n; int cnt; Union_Find_Tree(int n) : data(n, -1), n(n), cnt(n) {} int root(int x) { if (data[x] < 0) return x; return data[x] = root(data[x]); } int operator[](int i) { return root(i); } bool unite(int x, int y) { x = root(x), y = root(y); if (x == y) return false; // if (data[x] > data[y]) swap(x, y); data[x] += data[y], data[y] = x; cnt--; return true; } int size(int x) { return -data[root(x)]; } int count() { return cnt; }; bool same(int x, int y) { return root(x) == root(y); } void clear() { cnt = n; fill(begin(data), end(data), -1); } }; template struct Mod_Int { int x; Mod_Int() : x(0) {} Mod_Int(long long y) : x(y >= 0 ? y % mod : (mod - (-y) % mod) % mod) {} static int get_mod() { return mod; } Mod_Int &operator+=(const Mod_Int &p) { if ((x += p.x) >= mod) x -= mod; return *this; } Mod_Int &operator-=(const Mod_Int &p) { if ((x += mod - p.x) >= mod) x -= mod; return *this; } Mod_Int &operator*=(const Mod_Int &p) { x = (int)(1LL * x * p.x % mod); return *this; } Mod_Int &operator/=(const Mod_Int &p) { *this *= p.inverse(); return *this; } Mod_Int &operator++() { return *this += Mod_Int(1); } Mod_Int operator++(int) { Mod_Int tmp = *this; ++*this; return tmp; } Mod_Int &operator--() { return *this -= Mod_Int(1); } Mod_Int operator--(int) { Mod_Int tmp = *this; --*this; return tmp; } Mod_Int operator-() const { return Mod_Int(-x); } Mod_Int operator+(const Mod_Int &p) const { return Mod_Int(*this) += p; } Mod_Int operator-(const Mod_Int &p) const { return Mod_Int(*this) -= p; } Mod_Int operator*(const Mod_Int &p) const { return Mod_Int(*this) *= p; } Mod_Int operator/(const Mod_Int &p) const { return Mod_Int(*this) /= p; } bool operator==(const Mod_Int &p) const { return x == p.x; } bool operator!=(const Mod_Int &p) const { return x != p.x; } Mod_Int inverse() const { assert(*this != Mod_Int(0)); return pow(mod - 2); } Mod_Int pow(long long k) const { Mod_Int now = *this, ret = 1; for (; k > 0; k >>= 1, now *= now) { if (k & 1) ret *= now; } return ret; } friend ostream &operator<<(ostream &os, const Mod_Int &p) { return os << p.x; } friend istream &operator>>(istream &is, Mod_Int &p) { long long a; is >> a; p = Mod_Int(a); return is; } }; ll mpow(ll x, ll n, ll mod) { ll ans = 1; x %= mod; while (n != 0) { if (n & 1) ans = ans * x % mod; x = x * x % mod; n = n >> 1; } ans %= mod; return ans; } template T modinv(T a, const T &m) { T b = m, u = 1, v = 0; while (b > 0) { T t = a / b; swap(a -= t * b, b); swap(u -= t * v, v); } return u >= 0 ? u % m : (m - (-u) % m) % m; } ll divide_int(ll a, ll b) { if (b < 0) a = -a, b = -b; return (a >= 0 ? a / b : (a - b + 1) / b); } // const int MOD = 1000000007; const int MOD = 998244353; using mint = Mod_Int; // ----- library ------- // ラグランジュ補間 (1 点) // 計算量 O(n) // 概要 // n-1 次以下の多項式 f(x) について f(0),f(1),...,f(n-1) の値が与えられたときに、与えられた 1 点 c について f(c) を求める。 // ラグランジュ補間をすると、 // f(c) = Σ[0<=i struct Combination { static vector _fac, _ifac; Combination() {} static void init(int n) { _fac.resize(n + 1), _ifac.resize(n + 1); _fac[0] = 1; for (int i = 1; i <= n; i++) _fac[i] = _fac[i - 1] * i; _ifac[n] = _fac[n].inverse(); for (int i = n; i >= 1; i--) _ifac[i - 1] = _ifac[i] * i; } static T fac(int k) { return _fac[k]; } static T ifac(int k) { return _ifac[k]; } static T inv(int k) { return fac(k - 1) * ifac(k); } static T P(int n, int k) { if (k < 0 || n < k) return 0; return fac(n) * ifac(n - k); } static T C(int n, int k) { if (k < 0 || n < k) return 0; return fac(n) * ifac(n - k) * ifac(k); } // n 個の区別できる箱に、k 個の区別できない玉を入れる場合の数 static T H(int n, int k) { if (n < 0 || k < 0) return 0; return k == 0 ? 1 : C(n + k - 1, k); } // n 個の区別できる玉を、k 個の区別しない箱に、各箱に 1 個以上玉が入るように入れる場合の数 static T second_stirling_number(int n, int k) { T ret = 0; for (int i = 0; i <= k; i++) { T tmp = C(k, i) * T(i).pow(n); ret += ((k - i) & 1) ? -tmp : tmp; } return ret * ifac(k); } // n 個の区別できる玉を、k 個の区別しない箱に入れる場合の数 static T bell_number(int n, int k) { if (n == 0) return 1; k = min(k, n); vector pref(k + 1); pref[0] = 1; for (int i = 1; i <= k; i++) { if (i & 1) { pref[i] = pref[i - 1] - ifac(i); } else { pref[i] = pref[i - 1] + ifac(i); } } T ret = 0; for (int i = 1; i <= k; i++) ret += T(i).pow(n) * ifac(i) * pref[k - i]; return ret; } }; template vector Combination::_fac = vector(); template vector Combination::_ifac = vector(); // n 次多項式 f の f(0),...,f(n) を与えて f(c) を計算 // comb を n まで初期化する template T single_point_interpolation(vector ys, T c) { using comb_ = Combination; int n = ys.size(); T coef = 1; for (int i = 0; i < n; i++) { ys[i] *= coef * comb_::ifac(i); coef *= c - i; } coef = 1; T ret = 0; for (int i = n - 1; i >= 0; i--) { ret += ys[i] * coef * comb_::ifac(n - 1 - i) * ((n - 1 - i) & 1 ? -1 : 1); coef *= c - i; } return ret; } const vector fac{1,295201906,160030060,957629942,545208507,213689172,760025067,939830261,506268060,39806322,808258749,440133909,686156489,741797144,390377694,12629586,544711799,104121967,495867250,421290700,117153405,57084755,202713771,675932866,79781699,956276337,652678397,35212756,655645460,468129309,761699708,533047427,287671032,206068022,50865043,144980423,111276893,259415897,444094191,593907889,573994984,892454686,566073550,128761001,888483202,251718753,548033568,428105027,742756734,546182474,62402409,102052166,826426395,159186619,926316039,176055335,51568171,414163604,604947226,681666415,511621808,924112080,265769800,955559118,763148293,472709375,19536133,860830935,290471030,851685235,242726978,169855231,612759169,599797734,961628039,953297493,62806842,37844313,909741023,689361523,887890124,380694152,669317759,367270918,806951470,843736533,377403437,945260111,786127243,80918046,875880304,364983542,623250998,598764068,804930040,24257676,214821357,791011898,954947696,183092975,0}; // ----- library ------- int main() { ios::sync_with_stdio(false); std::cin.tie(nullptr); cout << fixed << setprecision(15); int n, k; cin >> n >> k; vector fk(k + 2, 0), fk1(k + 3, 0); rep2(i, 1, k + 2) fk[i] = fk[i - 1] + mint(i).pow(k); rep2(i, 1, k + 3) fk1[i] = fk1[i - 1] + mint(i).pow(k + 1); using comb = Combination; comb::init(k + 10); mint ans = single_point_interpolation(fk, n - 1) * n - single_point_interpolation(fk1, n - 1); ans *= 2; const int v = 1e7; ans *= fac[(n - 1) / v]; rep2(i, (n - 1) / v * v + 1, n) ans *= i; cout << ans << endl; }