#pragma GCC target("avx2") #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace atcoder; typedef long long ll; #define rep(i, n) for (int i = 0; i < (int)(n); i++) #define repr(i, n) for (int i = (int)(n)-1; i >= 0; i--) #define repk(i, k, n) for (int i = k; i < (int)(n); i++) #define all(v) v.begin(), v.end() #define mod1 1000000007 #define mod2 998244353 #define mod3 100000007 #define vi vector #define vs vector #define vc vector #define vl vector #define vb vector #define vvi vector> #define vvc vector> #define vvl vector> #define vvb vector> #define vvvi vector>> #define vvvl vector>> #define pii pair #define pil pair #define pli pair #define pll pair #define vpii vector> #define vpll vector> #define vvpii vector>> #define vvpll vector>> using mint = modint998244353; template void debug(T e) { cerr << e << endl; } template void debug(vector &v) { rep(i, v.size()) { cerr << v[i] << " "; } cerr << endl; } template void debug(vector> &v) { rep(i, v.size()) { rep(j, v[i].size()) { cerr << v[i][j] << " "; } cerr << endl; } } template void debug(vector> &v) { rep(i, v.size()) { cerr << v[i].first << " " << v[i].second << endl; } } template void debug(set &st) { for (auto itr = st.begin(); itr != st.end(); itr++) { cerr << *itr << " "; } cerr << endl; } template void debug(multiset &ms) { for (auto itr = ms.begin(); itr != ms.end(); itr++) { cerr << *itr << " "; } cerr << endl; } template void debug(map &mp) { for (auto itr = mp.begin(); itr != mp.end(); itr++) { cerr << itr->first << " " << itr->second << endl; } } void debug_out() { cerr << endl; } template void debug_out(Head H, Tail... T) { cerr << H << " "; debug_out(T...); } ll my_pow(ll x, ll n, ll mod) { //  繰り返し二乗法.x^nをmodで割った余り. ll ret; if (n == 0) { ret = 1; } else if (n % 2 == 1) { ret = (x * my_pow((x * x) % mod, n / 2, mod)) % mod; } else { ret = my_pow((x * x) % mod, n / 2, mod); } return ret; } int main() { ll N, P; cin >> N >> P; vector fact(500005); fact[0] = 1; for (ll i = 0; i < 500004; i++) { fact[i + 1] = (fact[i] * (i + 1)) % mod2; } if (N < P) { cout << (fact[N] + mod2 - 1) % mod2 << endl; } else { ll fail = 0; /*ll perfecto = fact[P]; ll p_sub = 0; for (ll k = 2; k <= P; k++) { if (k % 2 == 0) { p_sub = (p_sub + my_pow(fact[k], mod2 - 2, mod2)) % mod2; } else { p_sub = (p_sub + mod2 - my_pow(fact[k], mod2 - 2, mod2)) % mod2; } } perfecto = (perfecto * p_sub) % mod2;*/ ll sub = fact[N]; for (ll i = 0; i <= N; i += P) { // debug_out(i / P, N - i); ll subsub = sub; subsub = (subsub * my_pow(fact[i / P], mod2 - 2, mod2)) % mod2; subsub = (subsub * my_pow(fact[N - i], mod2 - 2, mod2)) % mod2; subsub = (subsub * my_pow(fact[P - 1], i / P, mod2)) % mod2; fail = (fail + subsub) % mod2; sub = (sub * my_pow(fact[P], mod2 - 2, mod2)) % mod2; } cout << (fact[N] + mod2 - fail) % mod2 << endl; } }