結果

問題 No.1781 LCM
ユーザー hitonanodehitonanode
提出日時 2021-12-31 00:39:42
言語 C++23
(gcc 12.3.0 + boost 1.83.0)
結果
AC  
実行時間 4,216 ms / 5,000 ms
コード長 21,663 bytes
コンパイル時間 2,305 ms
コンパイル使用メモリ 186,872 KB
実行使用メモリ 37,736 KB
最終ジャッジ日時 2024-05-18 11:38:36
合計ジャッジ時間 30,237 ms
ジャッジサーバーID
(参考情報)
judge4 / judge1
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 10 ms
7,812 KB
testcase_01 AC 12 ms
7,908 KB
testcase_02 AC 12 ms
7,908 KB
testcase_03 AC 12 ms
7,808 KB
testcase_04 AC 12 ms
7,908 KB
testcase_05 AC 12 ms
7,816 KB
testcase_06 AC 13 ms
7,848 KB
testcase_07 AC 10 ms
7,908 KB
testcase_08 AC 10 ms
7,908 KB
testcase_09 AC 11 ms
7,908 KB
testcase_10 AC 11 ms
7,848 KB
testcase_11 AC 10 ms
7,908 KB
testcase_12 AC 10 ms
7,788 KB
testcase_13 AC 10 ms
7,844 KB
testcase_14 AC 10 ms
7,904 KB
testcase_15 AC 10 ms
7,908 KB
testcase_16 AC 11 ms
7,856 KB
testcase_17 AC 11 ms
7,912 KB
testcase_18 AC 11 ms
7,840 KB
testcase_19 AC 10 ms
7,908 KB
testcase_20 AC 9 ms
7,808 KB
testcase_21 AC 4,139 ms
37,104 KB
testcase_22 AC 4,216 ms
37,736 KB
testcase_23 AC 10 ms
7,844 KB
testcase_24 AC 10 ms
7,940 KB
testcase_25 AC 4,143 ms
37,524 KB
testcase_26 AC 4,164 ms
37,116 KB
testcase_27 AC 4,088 ms
36,720 KB
testcase_28 AC 3,473 ms
35,684 KB
testcase_29 AC 952 ms
17,076 KB
testcase_30 AC 1,008 ms
17,344 KB
testcase_31 AC 10 ms
7,984 KB
testcase_32 AC 9 ms
7,908 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <chrono>
#include <cmath>
#include <complex>
#include <deque>
#include <forward_list>
#include <fstream>
#include <functional>
#include <iomanip>
#include <ios>
#include <iostream>
#include <limits>
#include <list>
#include <map>
#include <numeric>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using namespace std;
using lint = long long;
using pint = pair<int, int>;
using plint = pair<lint, lint>;
struct fast_ios { fast_ios(){ cin.tie(nullptr), ios::sync_with_stdio(false), cout << fixed << setprecision(20); }; } fast_ios_;
#define ALL(x) (x).begin(), (x).end()
#define FOR(i, begin, end) for(int i=(begin),i##_end_=(end);i<i##_end_;i++)
#define IFOR(i, begin, end) for(int i=(end)-1,i##_begin_=(begin);i>=i##_begin_;i--)
#define REP(i, n) FOR(i,0,n)
#define IREP(i, n) IFOR(i,0,n)
template <typename T, typename V>
void ndarray(vector<T>& vec, const V& val, int len) { vec.assign(len, val); }
template <typename T, typename V, typename... Args> void ndarray(vector<T>& vec, const V& val, int len, Args... args) { vec.resize(len), for_each(begin(vec), end(vec), [&](T& v) { ndarray(v, val, args...); }); }
template <typename T> bool chmax(T &m, const T q) { return m < q ? (m = q, true) : false; }
template <typename T> bool chmin(T &m, const T q) { return m > q ? (m = q, true) : false; }
int floor_lg(long long x) { return x <= 0 ? -1 : 63 - __builtin_clzll(x); }
template <typename T1, typename T2> pair<T1, T2> operator+(const pair<T1, T2> &l, const pair<T1, T2> &r) { return make_pair(l.first + r.first, l.second + r.second); }
template <typename T1, typename T2> pair<T1, T2> operator-(const pair<T1, T2> &l, const pair<T1, T2> &r) { return make_pair(l.first - r.first, l.second - r.second); }
template <typename T> vector<T> sort_unique(vector<T> vec) { sort(vec.begin(), vec.end()), vec.erase(unique(vec.begin(), vec.end()), vec.end()); return vec; }
template <typename T> int arglb(const std::vector<T> &v, const T &x) { return std::distance(v.begin(), std::lower_bound(v.begin(), v.end(), x)); }
template <typename T> int argub(const std::vector<T> &v, const T &x) { return std::distance(v.begin(), std::upper_bound(v.begin(), v.end(), x)); }
template <typename T> istream &operator>>(istream &is, vector<T> &vec) { for (auto &v : vec) is >> v; return is; }
template <typename T> ostream &operator<<(ostream &os, const vector<T> &vec) { os << '['; for (auto v : vec) os << v << ','; os << ']'; return os; }
template <typename T, size_t sz> ostream &operator<<(ostream &os, const array<T, sz> &arr) { os << '['; for (auto v : arr) os << v << ','; os << ']'; return os; }
#if __cplusplus >= 201703L
template <typename... T> istream &operator>>(istream &is, tuple<T...> &tpl) { std::apply([&is](auto &&... args) { ((is >> args), ...);}, tpl); return is; }
template <typename... T> ostream &operator<<(ostream &os, const tuple<T...> &tpl) { os << '('; std::apply([&os](auto &&... args) { ((os << args << ','), ...);}, tpl); return os << ')'; }
#endif
template <typename T> ostream &operator<<(ostream &os, const deque<T> &vec) { os << "deq["; for (auto v : vec) os << v << ','; os << ']'; return os; }
template <typename T> ostream &operator<<(ostream &os, const set<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <typename T, typename TH> ostream &operator<<(ostream &os, const unordered_set<T, TH> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <typename T> ostream &operator<<(ostream &os, const multiset<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <typename T> ostream &operator<<(ostream &os, const unordered_multiset<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <typename T1, typename T2> ostream &operator<<(ostream &os, const pair<T1, T2> &pa) { os << '(' << pa.first << ',' << pa.second << ')'; return os; }
template <typename TK, typename TV> ostream &operator<<(ostream &os, const map<TK, TV> &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; }
template <typename TK, typename TV, typename TH> ostream &operator<<(ostream &os, const unordered_map<TK, TV, TH> &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; }
#ifdef HITONANODE_LOCAL
const string COLOR_RESET = "\033[0m", BRIGHT_GREEN = "\033[1;32m", BRIGHT_RED = "\033[1;31m", BRIGHT_CYAN = "\033[1;36m", NORMAL_CROSSED = "\033[0;9;37m", RED_BACKGROUND = "\033[1;41m", NORMAL_FAINT = "\033[0;2m";
#define dbg(x) cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << endl
#define dbgif(cond, x) ((cond) ? cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << endl : cerr)
#else
#define dbg(x) (x)
#define dbgif(cond, x) 0
#endif

// Linear sieve algorithm for fast prime factorization
// Complexity: O(N) time, O(N) space:
// - MAXN = 10^7:  ~44 MB,  80~100 ms (Codeforces / AtCoder GCC, C++17)
// - MAXN = 10^8: ~435 MB, 810~980 ms (Codeforces / AtCoder GCC, C++17)
// Reference:
// [1] D. Gries, J. Misra, "A Linear Sieve Algorithm for Finding Prime Numbers,"
//     Communications of the ACM, 21(12), 999-1003, 1978.
// - https://cp-algorithms.com/algebra/prime-sieve-linear.html
// - https://37zigen.com/linear-sieve/
struct Sieve {
    std::vector<int> min_factor;
    std::vector<int> primes;
    Sieve(int MAXN) : min_factor(MAXN + 1) {
        for (int d = 2; d <= MAXN; d++) {
            if (!min_factor[d]) {
                min_factor[d] = d;
                primes.emplace_back(d);
            }
            for (const auto &p : primes) {
                if (p > min_factor[d] or d * p > MAXN) break;
                min_factor[d * p] = p;
            }
        }
    }
    // Prime factorization for 1 <= x <= MAXN^2
    // Complexity: O(log x)           (x <= MAXN)
    //             O(MAXN / log MAXN) (MAXN < x <= MAXN^2)
    template <class T> std::map<T, int> factorize(T x) const {
        std::map<T, int> ret;
        assert(x > 0 and
               x <= ((long long)min_factor.size() - 1) * ((long long)min_factor.size() - 1));
        for (const auto &p : primes) {
            if (x < T(min_factor.size())) break;
            while (!(x % p)) x /= p, ret[p]++;
        }
        if (x >= T(min_factor.size())) ret[x]++, x = 1;
        while (x > 1) ret[min_factor[x]]++, x /= min_factor[x];
        return ret;
    }
    // Enumerate divisors of 1 <= x <= MAXN^2
    // Be careful of highly composite numbers https://oeis.org/A002182/list
    // https://gist.github.com/dario2994/fb4713f252ca86c1254d#file-list-txt (n, (# of div. of n)):
    // 45360->100, 735134400(<1e9)->1344, 963761198400(<1e12)->6720
    template <class T> std::vector<T> divisors(T x) const {
        std::vector<T> ret{1};
        for (const auto p : factorize(x)) {
            int n = ret.size();
            for (int i = 0; i < n; i++) {
                for (T a = 1, d = 1; d <= p.second; d++) {
                    a *= p.first;
                    ret.push_back(ret[i] * a);
                }
            }
        }
        return ret; // NOT sorted
    }
    // Euler phi functions of divisors of given x
    // Verified: ABC212 G https://atcoder.jp/contests/abc212/tasks/abc212_g
    // Complexity: O(sqrt(x) + d(x))
    template <class T> std::map<T, T> euler_of_divisors(T x) const {
        assert(x >= 1);
        std::map<T, T> ret;
        ret[1] = 1;
        std::vector<T> divs{1};
        for (auto p : factorize(x)) {
            int n = ret.size();
            for (int i = 0; i < n; i++) {
                ret[divs[i] * p.first] = ret[divs[i]] * (p.first - 1);
                divs.push_back(divs[i] * p.first);
                for (T a = divs[i] * p.first, d = 1; d < p.second; a *= p.first, d++) {
                    ret[a * p.first] = ret[a] * p.first;
                    divs.push_back(a * p.first);
                }
            }
        }
        return ret;
    }
    // Moebius function Table, (-1)^{# of different prime factors} for square-free x
    // return: [0=>0, 1=>1, 2=>-1, 3=>-1, 4=>0, 5=>-1, 6=>1, 7=>-1, 8=>0, ...] https://oeis.org/A008683
    std::vector<int> GenerateMoebiusFunctionTable() const {
        std::vector<int> ret(min_factor.size());
        for (unsigned i = 1; i < min_factor.size(); i++) {
            if (i == 1) {
                ret[i] = 1;
            } else if ((i / min_factor[i]) % min_factor[i] == 0) {
                ret[i] = 0;
            } else {
                ret[i] = -ret[i / min_factor[i]];
            }
        }
        return ret;
    }
    // Calculate [0^K, 1^K, ..., nmax^K] in O(nmax)
    // Note: **0^0 == 1**
    template <class MODINT> std::vector<MODINT> enumerate_kth_pows(long long K, int nmax) const {
        assert(nmax < int(min_factor.size()));
        assert(K >= 0);
        if (K == 0) return std::vector<MODINT>(nmax + 1, 1);
        std::vector<MODINT> ret(nmax + 1);
        ret[0] = 0, ret[1] = 1;
        for (int n = 2; n <= nmax; n++) {
            if (min_factor[n] == n) {
                ret[n] = MODINT(n).pow(K);
            } else {
                ret[n] = ret[n / min_factor[n]] * ret[min_factor[n]];
            }
        }
        return ret;
    }
};
Sieve sieve((1 << 20));


template <int md> struct ModInt {
#if __cplusplus >= 201402L
#define MDCONST constexpr
#else
#define MDCONST
#endif
    using lint = long long;
    MDCONST static int mod() { return md; }
    static int get_primitive_root() {
        static int primitive_root = 0;
        if (!primitive_root) {
            primitive_root = [&]() {
                std::set<int> fac;
                int v = md - 1;
                for (lint i = 2; i * i <= v; i++)
                    while (v % i == 0) fac.insert(i), v /= i;
                if (v > 1) fac.insert(v);
                for (int g = 1; g < md; g++) {
                    bool ok = true;
                    for (auto i : fac)
                        if (ModInt(g).pow((md - 1) / i) == 1) {
                            ok = false;
                            break;
                        }
                    if (ok) return g;
                }
                return -1;
            }();
        }
        return primitive_root;
    }
    int val;
    MDCONST ModInt() : val(0) {}
    MDCONST ModInt &_setval(lint v) { return val = (v >= md ? v - md : v), *this; }
    MDCONST ModInt(lint v) { _setval(v % md + md); }
    MDCONST explicit operator bool() const { return val != 0; }
    MDCONST ModInt operator+(const ModInt &x) const { return ModInt()._setval((lint)val + x.val); }
    MDCONST ModInt operator-(const ModInt &x) const { return ModInt()._setval((lint)val - x.val + md); }
    MDCONST ModInt operator*(const ModInt &x) const { return ModInt()._setval((lint)val * x.val % md); }
    MDCONST ModInt operator/(const ModInt &x) const { return ModInt()._setval((lint)val * x.inv() % md); }
    MDCONST ModInt operator-() const { return ModInt()._setval(md - val); }
    MDCONST ModInt &operator+=(const ModInt &x) { return *this = *this + x; }
    MDCONST ModInt &operator-=(const ModInt &x) { return *this = *this - x; }
    MDCONST ModInt &operator*=(const ModInt &x) { return *this = *this * x; }
    MDCONST ModInt &operator/=(const ModInt &x) { return *this = *this / x; }
    friend MDCONST ModInt operator+(lint a, const ModInt &x) { return ModInt()._setval(a % md + x.val); }
    friend MDCONST ModInt operator-(lint a, const ModInt &x) { return ModInt()._setval(a % md - x.val + md); }
    friend MDCONST ModInt operator*(lint a, const ModInt &x) { return ModInt()._setval(a % md * x.val % md); }
    friend MDCONST ModInt operator/(lint a, const ModInt &x) {
        return ModInt()._setval(a % md * x.inv() % md);
    }
    MDCONST bool operator==(const ModInt &x) const { return val == x.val; }
    MDCONST bool operator!=(const ModInt &x) const { return val != x.val; }
    MDCONST bool operator<(const ModInt &x) const { return val < x.val; } // To use std::map<ModInt, T>
    friend std::istream &operator>>(std::istream &is, ModInt &x) {
        lint t;
        return is >> t, x = ModInt(t), is;
    }
    MDCONST friend std::ostream &operator<<(std::ostream &os, const ModInt &x) { return os << x.val; }
    MDCONST ModInt pow(lint n) const {
        ModInt ans = 1, tmp = *this;
        while (n) {
            if (n & 1) ans *= tmp;
            tmp *= tmp, n >>= 1;
        }
        return ans;
    }

    static std::vector<ModInt> facs, facinvs, invs;
    MDCONST static void _precalculation(int N) {
        int l0 = facs.size();
        if (N > md) N = md;
        if (N <= l0) return;
        facs.resize(N), facinvs.resize(N), invs.resize(N);
        for (int i = l0; i < N; i++) facs[i] = facs[i - 1] * i;
        facinvs[N - 1] = facs.back().pow(md - 2);
        for (int i = N - 2; i >= l0; i--) facinvs[i] = facinvs[i + 1] * (i + 1);
        for (int i = N - 1; i >= l0; i--) invs[i] = facinvs[i] * facs[i - 1];
    }
    MDCONST lint inv() const {
        if (this->val < std::min(md >> 1, 1 << 21)) {
            while (this->val >= int(facs.size())) _precalculation(facs.size() * 2);
            return invs[this->val].val;
        } else {
            return this->pow(md - 2).val;
        }
    }
    MDCONST ModInt fac() const {
        while (this->val >= int(facs.size())) _precalculation(facs.size() * 2);
        return facs[this->val];
    }
    MDCONST ModInt facinv() const {
        while (this->val >= int(facs.size())) _precalculation(facs.size() * 2);
        return facinvs[this->val];
    }
    MDCONST ModInt doublefac() const {
        lint k = (this->val + 1) / 2;
        return (this->val & 1) ? ModInt(k * 2).fac() / (ModInt(2).pow(k) * ModInt(k).fac())
                               : ModInt(k).fac() * ModInt(2).pow(k);
    }
    MDCONST ModInt nCr(const ModInt &r) const {
        return (this->val < r.val) ? 0 : this->fac() * (*this - r).facinv() * r.facinv();
    }
    MDCONST ModInt nPr(const ModInt &r) const {
        return (this->val < r.val) ? 0 : this->fac() * (*this - r).facinv();
    }

    ModInt sqrt() const {
        if (val == 0) return 0;
        if (md == 2) return val;
        if (pow((md - 1) / 2) != 1) return 0;
        ModInt b = 1;
        while (b.pow((md - 1) / 2) == 1) b += 1;
        int e = 0, m = md - 1;
        while (m % 2 == 0) m >>= 1, e++;
        ModInt x = pow((m - 1) / 2), y = (*this) * x * x;
        x *= (*this);
        ModInt z = b.pow(m);
        while (y != 1) {
            int j = 0;
            ModInt t = y;
            while (t != 1) j++, t *= t;
            z = z.pow(1LL << (e - j - 1));
            x *= z, z *= z, y *= z;
            e = j;
        }
        return ModInt(std::min(x.val, md - x.val));
    }
};
template <int md> std::vector<ModInt<md>> ModInt<md>::facs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::facinvs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::invs = {0};
using mint = ModInt<998244353>;


struct CountPrimes {
    // Count Primes less than or equal to x (\pi(x)) for each x = N / i (i = 1, ..., N) in O(N^(2/3)) time
    // Learned this algorihtm from https://old.yosupo.jp/submission/14650
    // Reference: https://min-25.hatenablog.com/entry/2018/11/11/172216
    using Int = long long;
    Int n, n2, n3, n6;
    std::vector<int> is_prime; // [0, 0, 1, 1, 0, 1, 0, 1, ...]
    std::vector<Int> primes;   // primes up to O(N^(1/2)), [2, 3, 5, 7, ...]

    int s;               // size of vs
    std::vector<Int> vs; // [N, ..., n2, n2 - 1, n2 - 2, ..., 3, 2, 1]
    std::vector<Int> pi; // pi[i] = (# of primes s.t. <= vs[i]) is finally obtained

    std::vector<int> _fenwick;

    int getidx(Int a) const { return a <= n2 ? s - a : n / a - 1; } // vs[i] >= a を満たす最大の i を返す

    CountPrimes(Int n_) : n(n_), n2((Int)sqrtl(n)), n3((Int)cbrtl(n)), n6((Int)sqrtl(n3)) {
        is_prime.assign(n2 + 300, 1), is_prime[0] = is_prime[1] = 0; // `+ 300`: https://en.wikipedia.org/wiki/Prime_gap
        for (size_t p = 2; p < is_prime.size(); p++) {
            if (is_prime[p]) {
                primes.push_back(p);
                for (size_t j = p * 2; j < is_prime.size(); j += p) is_prime[j] = 0;
            }
        }
        for (Int now = n; now; now = n / (n / now + 1))
            vs.push_back(now); // [N, N / 2, ..., 1], Relevant integers (decreasing) length ~= 2sqrt(N)
        s = vs.size();

        // pi[i] = (# of integers x s.t. x <= vs[i],  (x is prime or all factors of x >= p))
        // pre = (# of primes less than p)
        // 最小の素因数 p = 2, ..., について篩っていく
        pi.resize(s);
        for (int i = 0; i < s; i++) pi[i] = vs[i] - 1;
        int pre = 0;

        auto trans = [&](int i, Int p) { pi[i] -= pi[getidx(vs[i] / p)] - pre; };

        for (int ip = 0; primes[ip] <= n2; ip++, pre++) {
            const auto &p = primes[ip];
            for (int i = 0; p * p <= vs[i]; i++) trans(i, p);
        }
    }
};

int main() {
    lint N, M;
    cin >> N >> M;
    CountPrimes cp(M);

    vector<lint> xs = cp.vs;
    vector<lint> npr = cp.pi;
    dbgif(M <= 100, xs);
    dbgif(M <= 100, npr);

    const mint p2n = mint(2).pow(N);
    dbg(p2n);

    vector<mint> dp;
    // REP(i, npr.size()) dp.push_back(npr[i] * p2n + (xs[i] - npr[i]));
    REP(i, npr.size()) dp.push_back(npr[i] * p2n);  // 素数は 2^N,  1 は 1, 合成数は 0
    // REP(i, npr.size()) dp.push_back((xs[i] - 1) * p2n + 1);


    // dp.assign(xs.size(), 0);
    // for (auto &x : dp) x += 1;
    // dp.back() = 1;

    {
        int ip = 0;
        for (; cp.primes[ip] <= cp.n2; ip++) {}
        --ip;
        for (; ip >= 0; --ip) {
            const auto &p = cp.primes[ip];
            // 最小の素因数が p の合成数をなんとかする
            int i = 0;
            for (; p * p <= cp.vs[i]; ++i) {
            }
            --i;
            for (; i >= 0; --i) {
                int j = cp.getidx(cp.vs[i] / p);
                int k = cp.getidx(p);
                dp[i] += (dp[j] - dp[k + 1]) * p2n;
            }
            dbgif(M <= 100, p);
            dbgif(M <= 100, dp);
        }
    }

    // int pre = 0;
    // auto trans = [&](int i, long long p) {
    //     dp[i] += dp[cp.getidx(cp.vs[i] / p)] * (p2n - 1);
    // };
    // for (int ip = 0; cp.primes[ip] <= cp.n2; ip++, pre++) {
    //     const auto &p = cp.primes[ip];
    //     int i = 0;
    //     for (; p * p <= cp.vs[i]; ++i) {}
    //     --i;
    //     for (; i >= 0; --i) trans(i, p);
    // }

    for (auto &x : dp) x += 1;

    // vector<mint> gudp0;
    // for (auto x : xs) {
    //     mint sum = 0;
    //     FOR(i, 1, x + 1) {
    //         auto f = sieve.factorize(i);
    //         int nb = 0;
    //         for (auto [p, deg] : f) nb += deg;
    //         sum += mint(2).pow(nb);
    //     }
    //     gudp0.push_back(sum);
    // }

    // dbgif(M <= 100, gudp0);
    // dbgif(M <= 100, dp); // EQUAL
    // assert(gudp0 == dp);

    // auto primes_rev = cp.primes;
    // reverse(primes_rev.begin(), primes_rev.end());
    vector<mint> powNs(100);
    REP(i, powNs.size()) powNs[i] = mint(i).pow(N);

    for (lint p : cp.primes) {
        if (p * p > M) break;
        dbgif(M <= 100, p);
        for (int i = 0; p * p <= cp.vs[i]; ++i) {
            lint ppow = p * p;
            int deg = 2;
            // mint coeff = mint(2).pow(N) * mint(2).pow(N);
            mint coeff = powNs[2] * powNs[2];
            while (ppow <= cp.vs[i]) {
                int j = cp.getidx(cp.vs[i] / ppow);

                // mint nxtcoeff = mint(deg + 1).pow(N);
                mint nxtcoeff = powNs[deg + 1];
                dp[i] += dp[j] * (nxtcoeff - coeff);
                // coeff = nxtcoeff * mint(2).pow(N);
                coeff = nxtcoeff * powNs[2];

                ppow *= p;
                deg++;
            }
        }
        // int deg = 2;
        // lint ppow = p * p;
        // while (ppow <= M) {
        //     // mint coeff = (mint(deg + 1) / (deg * 2)).pow(N);
        //     mint coeff = (mint(deg + 1)).pow(N);
        //     int i = 0;

        //     auto trans = [&](int i) {
        //         dbgif(N == 3 and M == 4, ppow);
        //         dbgif(N == 3 and M == 4, cp.vs[i]);
        //         dbgif(N == 3 and M == 4, coeff);
        //         dbgif(N == 3 and M == 4, (coeff - mint(deg).pow(N) * p2n));
        //         dp[i] += dp[cp.getidx(cp.vs[i] / ppow)] * (coeff - mint(deg).pow(N) * p2n);
        //     };
        //     for (; ppow <= cp.vs[i]; ++i) {
        //         trans(i);
        //     }
        //     --i;
        //     for (; i >= 0; --i) {
        //         // trans(i);
        //     }
        //     dbgif(M <= 100, ppow);
        //     dbgif(M <= 100, dp);

        //     deg++;
        //     ppow *= p;
        // }
        dbgif(M <= 100, dp);
    }
    dbgif(M <= 100, dp);
    cout << dp[0] << endl;

    // vector<mint> gu;
    // for (auto x : cp.vs) {
    //     mint su = 0;
    //     FOR(i, 1, x + 1) {
    //         mint sigma0 = sieve.divisors(i).size();
    //         su += sigma0.pow(N);
    //     }
    //     gu.push_back(su);
    // }
    // dbgif(M <= 100, gu);
    // dbg(gu[0]);
}
0