#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <atcoder/modint>
using namespace std;
using i32 = int;
using u32 = unsigned int;
using i64 = long long;
using u64 = unsigned long long;
#define rep(i,n) for(int i=0; i<(int)(n); i++)
const i64 INF = 1001001001001001001;

using Modint = atcoder::static_modint<998244353>;
using Modint2 = atcoder::static_modint<1000'000'007>;
using Modint3 = atcoder::static_modint<1000'000'006>;

int main(){
    i64 N, P; cin >> N >> P;
    Modint3 a = 1;
    for(i64 i=1; i<=N; i++) a = a * i;
    Modint2 b = 1;
    for(i64 i=1; i<=N; i++) b = b * i;
    b = b.pow(a.val());
    Modint2 q = 0;
    for(i64 i=N/P; i; i/=P) q += i;
    i64 ans = (b * q).val();
    cout << ans << endl;
    return 0;
}



struct ios_do_not_sync{
    ios_do_not_sync(){
        ios::sync_with_stdio(false);
        cin.tie(nullptr);
    }
} ios_do_not_sync_instance;