#include #include using namespace std; using namespace atcoder; istream &operator>>(istream &is, modint &a) { long long v; is >> v; a = v; return is; } ostream &operator<<(ostream &os, const modint &a) { return os << a.val(); } istream &operator>>(istream &is, modint998244353 &a) { long long v; is >> v; a = v; return is; } ostream &operator<<(ostream &os, const modint998244353 &a) { return os << a.val(); } istream &operator>>(istream &is, modint1000000007 &a) { long long v; is >> v; a = v; return is; } ostream &operator<<(ostream &os, const modint1000000007 &a) { return os << a.val(); } typedef long long ll; typedef vector> Graph; typedef pair pii; typedef pair pll; #define rep(i,n) for (int i = 0;i < (int)(n); i++) #define all(x) x.begin(), x.end() #define rall(x) x.rbegin(), x.rend() #define my_sort(x) sort(x.begin(), x.end()) #define my_max(x) *max_element(all(x)) #define my_min(x) *min_element(all(x)) template inline bool chmax(T& a, T b) { if (a < b) { a = b; return 1; } return 0; } template inline bool chmin(T& a, T b) { if (a > b) { a = b; return 1; } return 0; } const int INF = (1<<30) - 1; const ll LINF = (1LL<<62) - 1; const int MOD = 998244353; const int MOD2 = 1e9+7; const double PI = acos(-1); vector di = {1,0,-1,0}; vector dj = {0,1,0,-1}; #ifdef LOCAL # include # define debug(...) debug_print::multi_print(#__VA_ARGS__, __VA_ARGS__) #else # define debug(...) (static_cast(0)) #endif using mint = modint998244353; int main(){ cin.tie(0); ios_base::sync_with_stdio(false); int N,P; cin >> N >> P; vector fact(N + 1, 1); for(int i=1;i<=N;i++) fact[i] = mint(i) * fact[i - 1]; auto nCk = [&](int n, int k){ return fact[n] / fact[n - k] / fact[k]; }; mint ans = fact[N]; if (N >= P){ int b = N % P; int a = N / P; for(int i=0;i<=a;i++){ mint res = fact[N] / fact[i] / fact[N - i * P]; res /= mint(P).pow(i); ans -= res; } } cout << ans << endl; }