#ifndef LOCAL #define FAST_IO #endif // ============ #include #define OVERRIDE(a, b, c, d, ...) d #define REP2(i, n) for (i32 i = 0; i < (i32)(n); ++i) #define REP3(i, m, n) for (i32 i = (i32)(m); i < (i32)(n); ++i) #define REP(...) OVERRIDE(__VA_ARGS__, REP3, REP2)(__VA_ARGS__) #define PER2(i, n) for (i32 i = (i32)(n)-1; i >= 0; --i) #define PER3(i, m, n) for (i32 i = (i32)(n)-1; i >= (i32)(m); --i) #define PER(...) OVERRIDE(__VA_ARGS__, PER3, PER2)(__VA_ARGS__) #define ALL(x) begin(x), end(x) #define LEN(x) (i32)(x.size()) using namespace std; using u32 = unsigned int; using u64 = unsigned long long; using i32 = signed int; using i64 = signed long long; using f64 = double; using f80 = long double; using pi = pair; using pl = pair; template using V = vector; template using VV = V>; template using VVV = V>>; template using VVVV = V>>>; template using PQR = priority_queue, greater>; template bool chmin(T &x, const T &y) { if (x > y) { x = y; return true; } return false; } template bool chmax(T &x, const T &y) { if (x < y) { x = y; return true; } return false; } template i32 lob(const V &arr, const T &v) { return (i32)(lower_bound(ALL(arr), v) - arr.begin()); } template i32 upb(const V &arr, const T &v) { return (i32)(upper_bound(ALL(arr), v) - arr.begin()); } template V argsort(const V &arr) { V ret(arr.size()); iota(ALL(ret), 0); sort(ALL(ret), [&](i32 i, i32 j) -> bool { if (arr[i] == arr[j]) { return i < j; } else { return arr[i] < arr[j]; } }); return ret; } #ifdef INT128 using u128 = __uint128_t; using i128 = __int128_t; #endif [[maybe_unused]] constexpr i32 INF = 1000000100; [[maybe_unused]] constexpr i64 INF64 = 3000000000000000100; struct SetUpIO { SetUpIO() { #ifdef FAST_IO ios::sync_with_stdio(false); cin.tie(nullptr); #endif cout << fixed << setprecision(15); } } set_up_io; void scan(char &x) { cin >> x; } void scan(u32 &x) { cin >> x; } void scan(u64 &x) { cin >> x; } void scan(i32 &x) { cin >> x; } void scan(i64 &x) { cin >> x; } void scan(string &x) { cin >> x; } template void scan(V &x) { for (T &ele : x) { scan(ele); } } void read() {} template void read(Head &head, Tail &...tail) { scan(head); read(tail...); } #define CHAR(...) \ char __VA_ARGS__; \ read(__VA_ARGS__); #define U32(...) \ u32 __VA_ARGS__; \ read(__VA_ARGS__); #define U64(...) \ u64 __VA_ARGS__; \ read(__VA_ARGS__); #define I32(...) \ i32 __VA_ARGS__; \ read(__VA_ARGS__); #define I64(...) \ i64 __VA_ARGS__; \ read(__VA_ARGS__); #define STR(...) \ string __VA_ARGS__; \ read(__VA_ARGS__); #define VEC(type, name, size) \ V name(size); \ read(name); #define VVEC(type, name, size1, size2) \ VV name(size1, V(size2)); \ read(name); // ============ #ifdef DEBUGF #else #define DBG(...) (void)0 #endif #include #include // ============ #include #include template class FactorialTable { std::vector fac; std::vector ifac; public: FactorialTable() : fac(1, T(1)), ifac(1, T(1)) {} FactorialTable(int n) : fac(n + 1), ifac(n + 1) { assert(n >= 0); fac[0] = T(1); for (int i = 1; i <= n; ++i) { fac[i] = fac[i - 1] * T(i); } ifac[n] = T(1) / T(fac[n]); for (int i = n; i > 0; --i) { ifac[i - 1] = ifac[i] * T(i); } } void resize(int n) { int old = n_max(); if (n <= old) { return; } fac.resize(n + 1); for (int i = old + 1; i <= n; ++i) { fac[i] = fac[i - 1] * T(i); } ifac.resize(n + 1); ifac[n] = T(1) / T(fac[n]); for (int i = n; i > old; --i) { ifac[i - 1] = ifac[i] * T(i); } } inline int n_max() const { return (int) fac.size() - 1; } inline T fact(int n) const { assert(n >= 0 && n <= n_max()); return fac[n]; } inline T inv_fact(int n) const { assert(n >= 0 && n <= n_max()); return ifac[n]; } inline T inv_n(int n) const { assert(n > 0 && n <= n_max()); return ifac[n] * fac[n - 1]; } inline T choose(int n, int k) const { assert(k <= n_max() && n <= n_max()); if (k > n || k < 0) { return T(0); } return fac[n] * ifac[k] * ifac[n - k]; } inline T multi_choose(int n, int k) const { assert(n >= 1 && k >= 0 && k + n - 1 <= n_max()); return choose(k + n - 1, k); } inline T n_terms_sum_k(int n, int k) const { assert(n >= 0); if (k < 0) { return T(0); } if (n == 0) { return k == 0 ? T(1) : T(0); } return choose(n + k - 1, n - 1); } }; // ============ using DM = atcoder::dynamic_modint<998>; using M9 = atcoder::modint998244353; void solve() { I64(n); I32(p); DM::set_mod(p); auto check_order = [&](i32 x) -> i32 { DM val(x); i32 e = 1; while (val.val() != 1) { val *= DM(x); ++e; } return e; }; i32 root = 1; while (check_order(root) != p - 1) { ++root; } V ord(p, 0); { DM pw(1); REP(i, p - 1) { ord[pw.val()] = i; pw *= DM(root); } } DBG(root, ord); FactorialTable fac(p - 1); V ds; while (n > 0) { ds.push_back(n % p); n /= p; } V dp(p - 1); dp[0] = M9(1); for (i32 d : ds) { V tmp(p - 1); REP(i, d + 1) { DM binom = fac.choose(d, i); tmp[ord[binom.val()]] += M9(1); } V conv = atcoder::convolution(dp, tmp); dp.assign(p - 1, M9()); REP(i, LEN(conv)) { dp[i % (p - 1)] += conv[i]; } } M9 ans; DM pw(1); REP(i, p - 1) { ans += M9(pw.val()) * dp[i]; pw *= DM(root); } cout << ans.val() << '\n'; } int main() { i32 t = 1; // cin >> t; while (t--) { solve(); } }