#include using namespace std; #include using namespace atcoder; template inline bool chmax(T &a, T b) { return ((a < b) ? (a = b, true) : (false)); } template inline bool chmin(T &a, T b) { return ((a > b) ? (a = b, true) : (false)); } #define rep(i, n) for (long long i = 0; i < (long long)(n); i++) #define rep2(i, m ,n) for (int i = (m); i < (long long)(n); i++) #define REP(i, n) for (long long i = 1; i < (long long)(n); i++) typedef long long ll; #pragma GCC target("avx512f") #pragma GCC optimize("Ofast") #pragma GCC optimize("unroll-loops") #define updiv(N,X) (N + X - 1) / X #define l(n) n.begin(),n.end() #define mat vector> #define YesNo(Q) Q==1?cout<<"Yes":cout<<"No" #define int long long using Pt = pair; using mint = modint; const int MOD = 998244353LL; const ll INF = 999999999999LL; vector fact, fact_inv, inv; /* init_nCk :二項係数のための前処理 計算量:O(n) */ template void input(vector &v){ rep(i,v.size()){cin>>v[i];} return; } void init_nCk(int SIZE) { fact.resize(SIZE + 5); fact_inv.resize(SIZE + 5); inv.resize(SIZE + 5); fact[0] = fact[1] = 1; fact_inv[0] = fact_inv[1] = 1; inv[1] = 1; for (int i = 2; i < SIZE + 5; i++) { fact[i] = fact[i - 1] * i % MOD; inv[i] = MOD - inv[MOD % i] * (MOD / i) % MOD; fact_inv[i] = fact_inv[i - 1] * inv[i] % MOD; } } /* nCk :MODでの二項係数を求める(前処理 int_nCk が必要) 計算量:O(1) */ long long nCk(int n, int k) { assert(!(n < k)); assert(!(n < 0 || k < 0)); return fact[n] * (fact_inv[k] * fact_inv[n - k] % MOD) % MOD; } long long modpow(long long a, long long n, long long mod) { long long res = 1; while (n > 0) { if (n & 1) res = res * a % mod; a = a * a % mod; n >>= 1; } return res; } ll POW(ll a,ll n){ long long res = 1; while (n > 0) { if (n & 1) res = res * a; a = a * a; n >>= 1; } return res; } struct unionfind{ vector par,siz; void reset(int n){par.resize(n);siz.resize(n);rep(i,n){par[i]=-1;siz[i]=1;}} int root(int x){ if(par[x]==-1){return x;} else{return par[x] = root(par[x]);} } bool issame(int x,int y){ return root(x)==root(y); } bool unite(int x,int y){ x = root(x);y=root(y); if(x == y){return false;} if(siz[x] < siz[y]){swap(x,y);} par[y] = x; siz[x] += siz[y]; return true; } int size(int x){ return siz[root(x)]; } }; struct graph{ vector > > val; void print(){ rep(i,val.size()){ rep(j,val[i].size()){ cout << val[i][j].first<<"/" <k);val[ n ].push_back( pair(k,cost) ); } void add2(int n,int k,ll cost){ val[ n ].push_back( pair(k,cost) ); val[ k ].push_back( pair(n,cost) );} vector dfs_basic(int a){ vectorseen(val.size(),-1); queue q;q.push(a);seen[a]=0; while(!q.empty()){ int wc=q.front(); q.pop(); rep(i,val[wc].size()){ if(-1==seen[val[wc][i].first]){q.push(val[wc][i].first);seen[val[wc][i].first]=seen[wc]+val[wc][i].second;} } } return seen; } }; // N の約数をすべて求める関数 ll cd(long long N) { // 答えを表す集合 long long res=0; // 各整数 i が N の約数かどうかを調べる for (long long i = 1; i * i <= N; ++i) { // i が N の約数でない場合はスキップ if (N % i != 0) continue; // i は約数である res ++; // N ÷ i も約数である (重複に注意) if (N / i != i){res += 1;} } // 約数を小さい順に並び替えて出力 return res; } ll mdt; /// 行列積 mat mat_mul(mat &a, mat &b) { mat res(a.size(), vector(b[0].size())); for (int i = 0; i < (int)(a.size()); i++) { for (int j = 0; j < (int)(b[0].size()); j++) { for (int k = 0; k < (int)(b.size()); k++) { (res[i][j] += a[i][k] * b[k][j]) %= mdt; } } } return res; } /// 行列累乗 mat mat_pow(mat a, long long n) { mat res(a.size(), vector(a.size())); // 単位行列で初期化 for (int i = 0; i < (int)(a.size()); i++) res[i][i] = 1; // 繰り返し二乗法 while (n > 0) { if (n & 1) res = mat_mul(a, res); a = mat_mul(a, a); n >>= 1; } return res; } int N,P; void cumsum(std::vector& x, int d = 1) { for (int i = d; i < x.size(); i++) { x[i] += x[i - d]; x[i] %= P; } } void md( long long& y ){ y = (y%P+P)%P; return; } signed main() { std::cin >> N >> P; int ans = 0; std::vector f = {0, 0}; int fact = 1; for (int i = 2; i <= N; i++) { f.push_back((modpow(N, N - i, P) * fact) % P); fact *= N - i; fact %= P; } std::vector dist(N, 0); int cumt = 0; int pat = 0; for (int i = N - 2; i > 0; i--) { cumt += f[i + 2]; pat += cumt; cumt %= P; pat %= P; dist[i] = pat; } int backet = 100; std::vector pats(N * 2, 0); std::vector> det(backet + 1, std::vector(N * 2, 0)); for (int i = 1; i <= N; i++) { for (int j = 0; j <= N; j++) { if (i * j >= N) { break; } if (j == 0) { pats[1] += N - i + 1; pats[i * (j + 1)] -= N - i + 1; continue; } if (j > backet) { int cnt = 0; for (int k = i * j + 1; k < i * (j + 1); k += j) { pats[k] += 1; cnt += 1; } pats[i * (j + 1)] -= cnt; } else { det[j][i * j + 1] += 1; int cnt = (i * (j + 1) - 2) / j - i + 1; det[j][std::min(N * 2 - 1, i * j + 1 + cnt * j)] -= 1; pats[i * (j + 1)] -= cnt; } } } for (int i = 1; i <= backet; i++) { cumsum(det[i], i); for (int j = 0; j < N * 2; j++) { pats[j] += det[i][j]; pats[j] %= P; } } cumsum(pats); for (int i = 0; i < N - 1; i++) { pats[i] = N * (N + 1) / 2 - pats[i]; pats[i] %= P;dist[i] %= P; ans += pats[i] * dist[i]; ans %= P; } cumsum(f); std::vector> div(N + 1, std::vector()); std::vector> cum(N + 1, std::vector(1, 0)); for (int i = 1; i <= N; i++) { for (int j = i; j <= N; j += i) { div[j].push_back(i); cum[i].push_back(cum[i].back() + f[j]); } } for (int i = 1; i <= N; i++) { std::vector gcd(div[i].size(), 0); for (int j = div[i].size() - 1; j >= 0; j--) { gcd[j] += N / div[i][j]; for (int k = j - 1; k >= 0; k--) { if (div[i][j] % div[i][k] == 0) { gcd[k] -= gcd[j]; } } int pl = cum[div[i][j]].back() - cum[div[i][j]][i / div[i][j]]; md(pl); int plcnt = cum[div[i][j]].size() - 1 - i / div[i][j]; md(plcnt); int mi = cum[div[i][j]].back() - cum[div[i][j]][i / div[i][j] - 1] + (f[i - 1] * (i / div[i][j] - 1))%P; md(mi); int micnt = plcnt + 1 + i / div[i][j] - 1; md(micnt); ans += ((pl + f.back() * (micnt - plcnt) - mi)%P) * gcd[j]; md(ans); ans %= P; if (j == 0) { ans += (((pl + f.back() * (micnt - plcnt) - mi)%P+P)%P) * ((N * (N - 1) / 2)%P); } md(ans); } } std::cout << (ans * ((N * (N - 1))%P)) % P << std::endl; return 0; }