#1013811 (C++23) No.713 素数の和

提出ソース
結果

問題	No.713 素数の和
ユーザー	miscalc
提出日時	2024-09-27 12:17:37
言語	C++23 (gcc 13.3.0 + boost 1.87.0)
結果	AC
実行時間	2 ms / 2,000 ms
コード長	47,063 bytes
コンパイル時間	4,273 ms
コンパイル使用メモリ	281,044 KB
実行使用メモリ	6,944 KB
最終ジャッジ日時	2024-09-27 12:17:43
合計ジャッジ時間	4,509 ms
ジャッジサーバーID （参考情報）	judge4 / judge3
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 3
other	AC * 6
権限があれば一括ダウンロードができます
ソースコード

raw source code
#include <bits/stdc++.h>
using namespace std;

//*
#define INCLUDE_MODINT
//*/
#ifdef INCLUDE_MODINT
#include <atcoder/modint>
using namespace atcoder;
using mint = modint998244353;
// using mint = modint1000000007;
// using mint = modint;
#endif

namespace mytemplate
{
  using ll = long long;
  using dbl = double;
  using ld = long double;
  using uint = unsigned int;
  using ull = unsigned long long;
  using pll = pair<ll, ll>;
  using tlll = tuple<ll, ll, ll>;
  using tllll = tuple<ll, ll, ll, ll>;
  template <class T> using vc = vector<T>;
  template <class T> using vvc = vector<vector<T>>;
  template <class T> using vvvc = vector<vector<vector<T>>>;
  using vb = vc<bool>;
  using vl = vc<ll>;
  using vpll = vc<pll>;
  using vtlll = vc<tlll>;
  using vtllll = vc<tllll>;
  using vstr = vc<string>;
  using vvb = vvc<bool>;
  using vvl = vvc<ll>;
  #ifdef __SIZEOF_INT128__
    using i128 = __int128_t;
    i128 stoi128(string s) { i128 res = 0; if (s.front() == '-') { for (int i = 1; i < (int)s.size(); i++) res = 10 * res + s[i] - '0'; res = -res; } else { for (auto c : s) res = 10 * res + c - '0'; } return res; }
    string i128tos(i128 x) { string sign = "", res = ""; if (x < 0) x = -x, sign = "-"; while (x > 0) { res += '0' + x % 10; x /= 10; } reverse(res.begin(), res.end()); if (res == "") return "0"; return sign + res; }
    istream &operator>>(istream &is, i128 &a) { string s; is >> s; a = stoi128(s); return is; }
    ostream &operator<<(ostream &os, const i128 &a) { os << i128tos(a); return os; }
  #endif
  #define cauto const auto

  #define overload4(_1, _2, _3, _4, name, ...) name
  #define rep1(i, n) for (ll i = 0, nnnnn = ll(n); i < nnnnn; i++)
  #define rep2(i, l, r) for (ll i = ll(l), rrrrr = ll(r); i < rrrrr; i++)
  #define rep3(i, l, r, d) for (ll i = ll(l), rrrrr = ll(r), ddddd = ll(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d)
  #define rep(...) overload4(__VA_ARGS__, rep3, rep2, rep1)(__VA_ARGS__)
  #define repi1(i, n) for (int i = 0, nnnnn = int(n); i < nnnnn; i++)
  #define repi2(i, l, r) for (int i = int(l), rrrrr = int(r); i < rrrrr; i++)
  #define repi3(i, l, r, d) for (int i = int(l), rrrrr = int(r), ddddd = int(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d)
  #define repi(...) overload4(__VA_ARGS__, repi3, repi2, repi1)(__VA_ARGS__)
  #define ALL(a) (a).begin(), (a).end()

  const ll INF = 4'000'000'000'000'000'037;

  bool chmin(auto &a, const auto &b) { return a > b ? a = b, true : false; }
  bool chmax(auto &a, const auto &b) { return a < b ? a = b, true : false; }
  template <class T1 = ll> T1 safemod(auto a, auto m) { T1 res = a % m; if (res < 0) res += m; return res; }
  template <class T1 = ll> T1 divfloor(auto a, auto b) { if (b < 0) a = -a, b = -b; return (a - safemod(a, b)) / b; }
  template <class T1 = ll> T1 divceil(auto a, auto b) { if (b < 0) a = -a, b = -b; return divfloor(a + b - 1, b); }
  template <class T1 = ll> T1 ipow(auto a, auto b) { if (a == 0) return b == 0 ? 1 : 0; if (a == 1) return a; if (a == -1) return b & 1 ? -1 : 1; ll res = 1; rep(_, b) res *= a; return res; }
  template <class T1 = ll> T1 mul_limited(auto a, auto b, T1 m = INF) { return b == 0 ? 0 : a > m / b ? m : a * b; }
  template <class T1 = ll> T1 pow_limited(auto a, auto b, T1 m = INF) { if (a == 0) return b == 0 ? 1 : 0; if (a == 1) return a; ll res = 1; rep(_, b) { if (res > m / a) return m; res *= a; } return res; }
    template <class T = ll>
  constexpr T iroot(cauto &a, cauto &k)
  {
    assert(a >= 0 && k >= 1);
    if (a <= 1 || k == 1)
      return a;
    if (k == 2)
      return sqrtl(a);

    auto isok = [&](T x) -> bool
    {
      if (x == 0)
        return true;
      T res = 1;
      for (T k2 = k;;)
      {
        if (k2 & 1)
        {
          if (res > T(a) / x)
            return false;
          res *= x;
        }
        k2 >>= 1;
        if (k2 == 0)
          break;
        if (x > T(a) / x)
          return false;
        x *= x;
      }
      return res <= T(a);
    };

    T ok = pow(a, 1.0 / k);
    while (!isok(ok))
      ok--;
    while (ok < numeric_limits<T>::max() && isok(ok + 1))
      ok++;
    return ok;
  }
  template <class T1 = ll> vector<T1> b_ary(T1 x, int b) { vector<T1> a; while (x > 0) { a.emplace_back(x % b); x /= b; } reverse(a.begin(), a.end()); return a; }
  template <class T1 = ll> vector<T1> b_ary(T1 x, int b, int n) { vector<T1> a(n); rep(i, n) { a[i] = x % b; x /= b; } reverse(a.begin(), a.end()); return a; }
  template <class T1 = ll> string b_ary_str(T1 x, int b) { auto a = b_ary(x, b); string s = ""; for (auto &&ai : a) s += (ai < 10 ? '0' + ai : 'A' + (ai - 10)); return s; }
  template <class T1 = ll> string b_ary_str(T1 x, int b, int n) { auto a = b_ary(x, b, n); string s = ""; for (auto &&ai : a) s += (ai < 10 ? '0' + ai : 'A' + (ai - 10)); return s; }
  template <class T>
  vector<vector<T>> iprod(const vector<T> &a)
  {
    vector<vector<T>> res;
    vector<T> tmp(a.size());
    auto dfs = [&](auto self, int i)
    {
      if (i == (int)a.size())
      {
        res.emplace_back(tmp);
        return;
      }
      rep(j, a[i])
      {
        tmp[i] = j;
        self(self, i + 1);
      }
    };
    dfs(dfs, 0);
    return res;
  }

  template <class T = ll> struct max_op { T operator()(const T &a, const T &b) const { return max(a, b); } };
  template <class T = ll> struct min_op { T operator()(const T &a, const T &b) const { return min(a, b); } };
  template <class T, const T val> struct const_fn { T operator()() const { return val; } };
  using max_e = const_fn<ll, -INF>;
  using min_e = const_fn<ll, INF>;
  using zero_fn = const_fn<ll, 0LL>;

  template <class T = ll> vector<T> digitvec(const string &s) { int n = s.size(); vector<T> a(n); rep(i, n) a[i] = s[i] - '0'; return a; }
  template <class T, size_t d, size_t i = 0> auto make_vec(const auto (&sz)[d], const T &init) { if constexpr (i < d) return vector(sz[i], make_vec<T, d, i + 1>(sz, init)); else return init; }
  template <class T = ll> vector<T> permid(int n, int base_index = 0) { vector<T> p(n); rep(i, n) p[i] = i + base_index; return p; }
  template <class T = ll> vector<T> perminv(const vector<T> &p) { vector<T> q(p.size()); rep(i, p.size()) q[p[i]] = i; return q; }
  template <class T = ll> vector<T> combid(int n, int k) { vector<T> p(n, 0); fill(p.rbegin(), p.rbegin() + k, 1); return p; }
  template <class F> auto gen_vec(int n, const F &f) { using T = decltype(f(0)); vector<T> res(n); rep(i, n) res[i] = f(i); return res; }
  // res[i] = op[0, i) for 0 <= i < n+1
  template <class T, class F = decltype(plus<>())> vector<T> cuml(vector<T> v, const F &op = plus<>(), const T &e = 0) { v.emplace_back(e); exclusive_scan(v.begin(), v.end(), v.begin(), e, op); return v; }
  // res[i] = op[i, n) for 0 <= i < n+1
  template <class T, class F = decltype(plus<>())> vector<T> cumr(vector<T> v, const F &op = plus<>(), const T &e = 0) { v.insert(v.begin(), e); exclusive_scan(v.rbegin(), v.rend(), v.rbegin(), e, op); return v; }
  // res[i] = v[i] - v[i-1] for 0 <= i < n+1
  template <class T> vector<T> adjd(vector<T> v) { v.emplace_back(0); adjacent_difference(v.begin(), v.end(), v.begin()); return v; }
  template <class T> vector<T> cumlmax(const vector<T> &v) { return cuml(v, max_op<T>(), max_e()()); }
  template <class T> vector<T> cumrmax(const vector<T> &v) { return cumr(v, max_op<T>(), max_e()()); }
  template <class T> vector<T> cumlmin(const vector<T> &v) { return cuml(v, min_op<T>(), min_e()()); }
  template <class T> vector<T> cumrmin(const vector<T> &v) { return cumr(v, min_op<T>(), min_e()()); }
  template <class T> vector<T> sorted(vector<T> v) { sort(v.begin(), v.end()); return v; }
  template <class T> vector<T> reversed(const vector<T> &v) { return {v.rbegin(), v.rend()}; }
  template <class T> void unique(vector<T> &v) { v.erase(unique(v.begin(), v.end()), v.end()); }
  template <class T> vector<T> uniqued(vector<T> v) { v.erase(unique(v.begin(), v.end()), v.end()); return v; }
  template <class T> void sortunique(vector<T> &v) { sort(v.begin(), v.end()); v.erase(unique(v.begin(), v.end()), v.end()); }
  template <class T> vector<T> sortuniqued(vector<T> v) { sort(v.begin(), v.end()); v.erase(unique(v.begin(), v.end()), v.end()); return v; }
  template <class T> void rotate(vector<T> &v, int k) { rotate(v.begin(), v.begin() + k, v.end()); }
  template <class T> vector<T> rotated(vector<T> v, int k) { rotate(v.begin(), v.begin() + k, v.end()); return v; }
  string sorted(string s) { sort(s.begin(), s.end()); return s; }
  string reversed(const string &s) { return {s.rbegin(), s.rend()}; }
  void unique(string &s) { s.erase(unique(s.begin(), s.end()), s.end()); }
  string uniqued(string s) { s.erase(unique(s.begin(), s.end()), s.end()); return s; }
  void sortunique(string &s) { sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); }
  string sortuniqued(string s) { sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); return s; }
  void rotate(string &s, int k) { rotate(s.begin(), s.begin() + k, s.end()); }
  string rotated(string s, int k) { rotate(s.begin(), s.begin() + k, s.end()); return s; }
  template <class T> vector<vector<T>> top(const vector<vector<T>> &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vector<vector<T>> b(m, vector<T>(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; }
  vstr top(const vstr &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vstr b(m, string(n, 0)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; }
  template <class T> vector<vector<T>> rot90(const vector<vector<T>> &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vector<vector<T>> b(m, vector<T>(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][n - 1 - i] = a[i][j]; return b; }
  vstr rot90(const vstr &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vstr b(m, string(n, 0)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][n - 1 - i] = a[i][j]; return b; }

  #if __cplusplus < 202002L
    ull bit_ceil(ull x) { ull y = 1; while (y < x) y <<= 1; return y; }
    ull bit_floor(ull x) { ull y = 1; while (y <= x) y <<= 1; return y >> 1; }
    ull bit_width(ull x) { ull y = 1, z = 0; while (y <= x) y <<= 1, z++; return z; }
    ull countr_zero(ull x) { return __builtin_ctzll(x); }
    ull popcount(ull x) { return __builtin_popcountll(x); }
    ull has_single_bit(ull x) { return popcount(x) == 1; }
  #endif
  ull lsb_pos(ull x) { assert(x != 0); return countr_zero(x); }
  ull msb_pos(ull x) { assert(x != 0); return bit_width(x) - 1; }
  ull lsb_mask(ull x) { assert(x != 0); return x & -x; }
  ull msb_mask(ull x) { assert(x != 0); return bit_floor(x); }
  bool btest(ull x, uint k) { return (x >> k) & 1; }
  template <class T> void bset(T &x, uint k, bool b = 1) { b ? x |= (1ULL << k) : x &= ~(1ULL << k); }
  template <class T> void bflip(T &x, uint k) { x ^= (1ULL << k); }
  bool bsubset(ull x, ull y) { return (x & y) == x; }
  template <class T> vector<pair<T, T>> bsubsets(T x) { vector<pair<T, T>> res; for (T y = x; y > 0; y = (y - 1) & x) res.emplace_back(make_pair(y, x & ~y)); res.emplace_back(make_pair(0, x)); return res; }

  template <class Tuple, size_t... I> Tuple tuple_add(Tuple &a, const Tuple &b, const index_sequence<I...>) { ((get<I>(a) += get<I>(b)), ...); return a; }
  template <class Tuple> Tuple operator+=(Tuple &a, const Tuple &b) { return tuple_add(a, b, make_index_sequence<tuple_size_v<Tuple>>{}); }
  template <class Tuple> Tuple operator+(Tuple a, const Tuple &b) { return a += b; }
  template <class T, class U> void offset(vector<T> &v, U add) { for (auto &vi : v) vi += add; }
  template <class T, class U> void offset(vector<vector<T>> &v, U add) { for (auto &vi : v) for (auto &vij : vi) vij += add; }
  template <class T, const size_t m> array<vector<T>, m> top(const vector<array<T, m>> &a) { const size_t n = a.size(); array<vector<T>, m> b; b.fill(vector<T>(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i][j]; return b; }
  template <class T, const size_t n> vector<array<T, n>> top(const array<vector<T>, n> &a) { if (a.empty()) return {}; const size_t m = a[0].size(); vector<array<T, n>> b(m); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; }
  template <class T, class U> pair<vector<T>, vector<U>> top(const vector<pair<T, U>> &a) { const size_t n = a.size(); vector<T> b(n); vector<U> c(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i]) = a[i]; return make_pair(b, c); }
  template <class T, class U> vector<pair<T, U>> top(const pair<vector<T>, vector<U>> &a) { const size_t n = a.first.size(); vector<pair<T, U>> b(n); for (size_t i = 0; i < n; i++) b[i] = make_pair(a.first[i], a.second.at(i)); return b; }
  template <class T1, class T2, class T3> tuple<vector<T1>, vector<T2>, vector<T3>> top(const vector<tuple<T1, T2, T3>> &a) { const size_t n = a.size(); vector<T1> b(n); vector<T2> c(n); vector<T3> d(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i], d[i]) = a[i]; return make_tuple(b, c, d); }
  template <class T1, class T2, class T3> vector<tuple<T1, T2, T3>> top(const tuple<vector<T1>, vector<T2>, vector<T3>> &a) { const size_t n = get<0>(a).size(); vector<tuple<T1, T2, T3>> b(n); for (size_t i = 0; i < n; i++) b[i] = make_tuple(get<0>(a)[i], get<1>(a).at(i), get<2>(a).at(i)); return b; }
  template <class T1, class T2, class T3, class T4> tuple<vector<T1>, vector<T2>, vector<T3>, vector<T4>> top(const vector<tuple<T1, T2, T3, T4>> &a) { const size_t n = a.size(); vector<T1> b(n); vector<T2> c(n); vector<T3> d(n); vector<T4> e(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i], d[i], e[i]) = a[i]; return make_tuple(b, c, d, e); }
  template <class T1, class T2, class T3, class T4> vector<tuple<T1, T2, T3, T4>> top(const tuple<vector<T1>, vector<T2>, vector<T3>, vector<T4>> &a) { const size_t n = get<0>(a).size(); vector<tuple<T1, T2, T3, T4>> b(n); for (size_t i = 0; i < n; i++) b[i] = make_tuple(get<0>(a)[i], get<1>(a).at(i), get<2>(a).at(i), get<3>(a).at(i)); return b; }

  #ifdef INCLUDE_MODINT
    using namespace atcoder;
    template <class T, internal::is_modint_t<T> * = nullptr> istream &operator>>(istream &is, T &a) { ll v; is >> v; a = v; return is; }
    template <class T, internal::is_modint_t<T> * = nullptr> ostream &operator<<(ostream &os, const T &a) { os << a.val(); return os; }
    #define MINT(...) mint __VA_ARGS__; INPUT(__VA_ARGS__)
  #endif
  
  template <class Tuple, enable_if_t<__is_tuple_like<Tuple>::value == true> * = nullptr> istream &operator>>(istream &is, Tuple &t) { apply([&](auto&... a){ (is >> ... >> a); }, t); return is; }
  template <class... T> void INPUT(T&... a) { (cin >> ... >> a); }
  template <class T> void INPUTVEC(int n, vector<T> &v) { v.resize(n); rep(i, n) cin >> v[i]; }
  template <class T, class... Ts> void INPUTVEC(int n, vector<T>& v, vector<Ts>&... vs) { INPUTVEC(n, v); INPUTVEC(n, vs...); }
  template <class T> void INPUTVEC2(int n, int m, vector<vector<T>> &v) { v.assign(n, vector<T>(m)); rep(i, n) rep(j, m) cin >> v[i][j]; }
  template <class T, class... Ts> void INPUTVEC2(int n, int m, vector<T>& v, vector<Ts>&... vs) { INPUTVEC2(n, m, v); INPUTVEC2(n, m, vs...); }
  #define INT(...) int __VA_ARGS__; INPUT(__VA_ARGS__)
  #define LL(...) ll __VA_ARGS__; INPUT(__VA_ARGS__)
  #define STR(...) string __VA_ARGS__; INPUT(__VA_ARGS__)
  #define ARR(T, n, ...) array<T, n> __VA_ARGS__; INPUT(__VA_ARGS__)
  #define VEC(T, n, ...) vector<T> __VA_ARGS__; INPUTVEC(n, __VA_ARGS__)
  #define VEC2(T, n, m, ...) vector<vector<T>> __VA_ARGS__; INPUTVEC2(n, m, __VA_ARGS__)
  template <class T> void PRINT(const T &a) { cout << a << '\n'; }
  template <class T, class... Ts> void PRINT(const T& a, const Ts&... b) { cout << a; (cout << ... << (cout << ' ', b)); cout << '\n'; }
  template <class T> void PRINTVEC(const vector<T> &v) { int n = v.size(); rep(i, n) cout << v[i] << (i == n - 1 ? "" : " "); cout << '\n'; }
  template <class T> void PRINTVECT(const vector<T> &v) { for (auto &vi : v) cout << vi << '\n';}
  template <class T> void PRINTVEC2(const vector<vector<T>> &v) { for (auto &vi : v) PRINTVEC(vi); }
  #define PRINTEXIT(...) do { PRINT(__VA_ARGS__); exit(0); } while (false)
  #define PRINTRETURN(...) do { PRINT(__VA_ARGS__); return; } while (false)
}
using namespace mytemplate;
#ifdef LOCAL
  #include <cpp-dump.hpp> // https://github.com/philip82148/cpp-dump
  namespace cpp_dump::_detail
  {
    inline string export_var(
        const i128 &x, const string &indent, size_t last_line_length,
        size_t current_depth, bool fail_on_newline, const export_command &command
    ) {
      return export_var(i128tos(x), indent, last_line_length, current_depth, fail_on_newline, command);
    }

    #ifdef INCLUDE_MODINT
      template <int m>
      inline std::string export_var(
          const atcoder::static_modint<m> &mint, const std::string &indent, std::size_t last_line_length,
          std::size_t current_depth, bool fail_on_newline, const export_command &command
      ) {
        return export_var(mint.val(), indent, last_line_length, current_depth, fail_on_newline, command);
      }

      template <int m>
      inline std::string export_var(
          const atcoder::dynamic_modint<m> &mint, const std::string &indent, std::size_t last_line_length,
          std::size_t current_depth, bool fail_on_newline, const export_command &command
      ) {
        return export_var(mint.val(), indent, last_line_length, current_depth, fail_on_newline, command);
      }
    #endif

  } // namespace cpp_dump::_detail
  #define dump(...) cpp_dump(__VA_ARGS__)
  namespace cp = cpp_dump;
  CPP_DUMP_SET_OPTION_GLOBAL(log_label_func, cp::log_label::line());
  CPP_DUMP_SET_OPTION_GLOBAL(max_iteration_count, 10000);
#else
  #define dump(...)
#endif

#define SINGLE_TESTCASE
// #define MULTI_TESTCASE
// #define AOJ_TESTCASE

#define FAST_IO

template <class P>
struct PrimePower
{
  P p;
  int e;
  P pe;

  PrimePower() : p(-1), e(-1), pe(-1) {}
  PrimePower(P p, int e = 1) : p(p), e(e), pe(ipow(p, e)) {}
  PrimePower(P p, int e, P pe) : p(p), e(e), pe(pe) {}

  void mulp() { e++, pe *= p; }
  void divp() { e--, pe /= p; }

  template <class P2>
  PrimePower(const PrimePower<P2> &pp) { p = pp.p, e = pp.e, pe = pp.pe; }
};
#ifdef LOCAL
CPP_DUMP_DEFINE_EXPORT_OBJECT(PrimePower<int>, p, e, pe);
CPP_DUMP_DEFINE_EXPORT_OBJECT(PrimePower<ll>, p, e, pe);
#endif

struct LinearSieve
{
private:
  static vector<int> prime_id;

public:
  static int n;
  static vector<PrimePower<int>> _lpf;
  static vector<int> primes;

  static void extend(int _n)
  {
    if (_n <= n)
      return;
    n = max(_n, 2 * n);
    prime_id.resize(n + 1, 0);
    _lpf.resize(n + 1);
    for (int d = 2; d <= n; d++)
    {
      if (_lpf[d].p == -1)
        _lpf[d] = PrimePower<int>(d, 1, d), primes.emplace_back(d);
      for (int &i = prime_id[d]; i < (int)primes.size(); i++)
      {
        int p = primes[i];
        if (p > n / d || p > _lpf[d].p)
          break;
        if (_lpf[d].p == p)
          _lpf[p * d] = PrimePower<int>(p, _lpf[d].e + 1, _lpf[d].pe * p);
        else
          _lpf[p * d] = PrimePower<int>(p, 1, p);
      }
    }
  }

  static PrimePower<int> lpf(int x)
  {
    assert(x >= 1 && "LinearSieve::lpf");
    extend(x);
    return _lpf[x];
  }

  static bool is_prime(int x)
  {
    if (x <= 1)
      return false;
    return lpf(x).p == x;
  }

  // 計算量: O(x の素因数の種類数) = O(log x / loglog x)
  static vector<PrimePower<int>> factorize(int x)
  {
    assert(x >= 1 && "LinearSieve::factorize");
    extend(x);
    vector<PrimePower<int>> res;
    while (x > 1)
    {
      res.emplace_back(_lpf[x]);
      x /= _lpf[x].pe;
    }
    return res;
  }
};
vector<int> LinearSieve::prime_id{};
vector<PrimePower<int>> LinearSieve::_lpf{};
int LinearSieve::n{};
vector<int> LinearSieve::primes{};
using sv = LinearSieve;

struct SegmentedSieve
{
  // pfs[x - l] は x の素因数 (x 以外)
  vector<vector<int>> pfs;
  ll l, r;

  SegmentedSieve() {}
  // 前計算の計算量: O( ( √r + (r-l) ) loglog r )
  SegmentedSieve(ll l, ll r) : l(l), r(r)
  {
    LinearSieve::extend(sqrtl(r) + 1);
    pfs.resize(r - l + 1);
    for (const ll p : LinearSieve::primes)
      for (ll x = max(2 * p, divceil(l, p) * p); x <= r; x += p)
        pfs[x - l].emplace_back(p);
  }

  bool is_prime(ll x)
  {
    if (l <= x && x <= r)
      return pfs[l - x].empty();
    assert(x <= INT32_MAX && "SegmentedSieve::is_prime");
    return LinearSieve::is_prime(x);
  }

  // 計算量: O(log x)
  vector<PrimePower<ll>> factorize(ll x)
  {
    if (l <= x && x <= r)
    {
      vector<PrimePower<ll>> res;
      for (const ll p : pfs[x - l])
      {
        int e = 0;
        ll pe = 1;
        while (x % p == 0)
          x /= p, e++, pe *= p;
        res.emplace_back(PrimePower<ll>(p, e, pe));
      }
      if (x != 1)
        res.emplace_back(PrimePower<ll>(x, 1, x));
      return res;
    }
    assert(x <= INT32_MAX && "SegmentedSieve::factorize");
    vector<PrimePower<int>> res = LinearSieve::factorize(x);
    return vector<PrimePower<ll>>(res.begin(), res.end());
  }
};

namespace zeta_mobius_small
{
  // ζa(n) = Σ[d | n] a(d)
  // a[0] は用いない
  template <class T>
  vector<T> zeta_divisor(const vector<T> &a)
  {
    const int n = (int)a.size() - 1;
    LinearSieve::extend(n);
    vector<T> b(a);
    for (const int &p : LinearSieve::primes)
    {
      if (p > n)
        break;
      for (int i = 1; i * p <= n; i++)
        b[i * p] += b[i];
    }
    return b;
  }

  // μ は ζ の逆変換
  // μa(n) = Σ{d | n} μ(n/d)a(d)  cf. メビウスの反転公式
  // a[0] は用いない
  template <class T>
  vector<T> mobius_divisor(const vector<T> &a)
  {
    const int n = (int)a.size() - 1;
    LinearSieve::extend(n);
    vector<T> b(a);
    for (const int &p : LinearSieve::primes)
    {
      if (p > n)
        break;
      for (int i = n / p; i >= 1; i--)
        b[i * p] -= b[i];
    }
    return b;
  }

  // ζ'a(n) = Σ{n | m} a(m)
  // a[0] は用いない
  template <class T>
  vector<T> zeta_multiple(const vector<T> &a)
  {
    const int n = (int)a.size() - 1;
    LinearSieve::extend(n);
    vector<T> b(a);
    for (const int &p : LinearSieve::primes)
    {
      if (p > n)
        break;
      for (int i = n / p; i >= 1; i--)
        b[i] += b[i * p];
    }
    return b;
  }

  // μ' は ζ' の逆変換
  // μ'a(n) = Σ{n | m} μ(m/n)g(m)
  // a[0] は用いない
  template <class T>
  vector<T> mobius_multiple(const vector<T> &a)
  {
    const int n = (int)a.size() - 1;
    LinearSieve::extend(n);
    vector<T> b(a);
    for (const int &p : LinearSieve::primes)
    {
      if (p > n)
        break;
      for (int i = 1; i * p <= n; i++)
        b[i] -= b[i * p];
    }
    return b;
  }

  // |a| = |b| を仮定
  // a[0], b[0] は用いない
  template <class T>
  vector<T> lcm_convolution(const vector<T> &a, const vector<T> &b)
  {
    assert(a.size() == b.size() && "lcm_convolution");
    vector<T> za = zeta_divisor(a), zb = zeta_divisor(b);
    vector<T> zc(a.size());
    for (int i = 1; i < (int)a.size(); i++)
      zc[i] = za[i] * zb[i];
    return mobius_divisor(zc);
  }

  // |a| = |b| を仮定
  // a[0], b[0] は用いない
  template <class T>
  vector<T> gcd_convolution(const vector<T> &a, const vector<T> &b)
  {
    assert(a.size() == b.size() && "gcd_convolution");
    vector<T> za = zeta_multiple(a), zb = zeta_multiple(b);
    vector<T> zc(a.size());
    for (int i = 1; i < (int)a.size(); i++)
      zc[i] = za[i] * zb[i];
    return mobius_multiple(zc);
  }
};
using namespace zeta_mobius_small;

namespace multiplicative
{
  // 完全乗法的関数: f(1) = 1, f(p1^e1 ... pk^ek) = f(p1)^e1 ... f(pk)^ek
  // f(p) が O(t) 時間で計算できるとき f(1), ..., f(n) を O(n + nt/log n) 時間で計算
  // f_primepower の引数は PrimePower 型 (なお f(p) 部分しか用いない)
  template <class T>
  vector<T> enumerate_completely_multiplicative(int n, const auto &f_primepower)
  {
    LinearSieve::extend(n);
    vector<T> res(n + 1);
    if (n >= 1)
      res[1] = 1;
    for (int d = 2; d <= n; d++)
    {
      int p = LinearSieve::_lpf[d].p;
      if (d == p)
        res[d] = f_primepower(PrimePower<int>(p, 1, p));
      else
        res[d] = res[p] * res[d / p];
    }
    return res;
  }

  // 乗法的関数: f(1) = 1, f(p1^e1 ... pk^ek) = f(p1^e1) ... f(pk^ek)
  // f(p^e) が O(1) 時間で計算できるとき f(1), ..., f(n) を O(n) 時間で列挙
  // f_primepower の引数は PrimePower 型
  template <class T>
  vector<T> enumerate_multiplicative(int n, const auto &f_primepower)
  {
    LinearSieve::extend(n);
    vector<T> res(n + 1);
    if (n >= 1)
      res[1] = 1;
    for (int d = 2; d <= n; d++)
    {
      const PrimePower<int> &lpf_d = LinearSieve::_lpf[d];
      int pe = lpf_d.pe;
      if (d == pe)
        res[d] = f_primepower(lpf_d);
      else
        res[d] = res[d / pe] * res[pe];
    }
    return res;
  }

  // ------ 完全乗法的 -----

  // 単位元: ε(1) = 1, otherwise 0
  template <class T, class P = ll>
  const auto e_primepower = [](const PrimePower<P> &q) -> T
  { return q.pe == 1 ? 1 : 0; };
  // ゼータ関数 ζ(s): 1(n) = 1
  template <class T, class P = ll>
  const auto zeta_primepower = [](const PrimePower<P> &q) -> T
  { return 1; };
  // 恒等写像 ζ(s-1): id(n) = n
  template <class T, class P = ll>
  const auto id_primepower = [](const PrimePower<P> &q) -> T
  { return q.pe; };

  // f(n) = n^k (k >= 0)
  // T は modint を想定
  // 1 から n までの列挙にかかる計算量: O(n log k / log n)
  template <class T, class P = ll>
  const auto pow_primepower = [](ll k)
  {
    return [&](const PrimePower<P> &q) -> T
    { return T(q.pe).pow(k); };
  };
  // f(n) = n^{-k} (k >= 0)
  // 逆元がないときは 0 とする
  // T は modint を想定
  // 1 から n までの列挙にかかる計算量: O(n log k)
  template <class T, class P = ll>
  const auto pow_inv_primepower = [](ll k)
  {
    return [&](const PrimePower<P> &q) -> T
    {
      if (internal::is_prime_constexpr(T::mod()))
        return T(q.pe).pow(safemod(-k, T::mod() - 1));
      else
        return T::mod() % q.p == 0 ? T(0) : T(q.pe).inv().pow(k);
    };
  };

  // ----- 乗法的 -----

  // メビウス関数: μ(s) = 1/ζ(s) = prod_{p} (1-p)
  // 重複する素因数があれば 0、相異なる素因数の積のとき、素因数が偶数個なら 1、奇数個なら -1
  template <class T, class P = ll>
  const auto mobius_primepower = [](const PrimePower<P> &q) -> T
  { return q.e == 0 ? 1 : q.e == 1 ? -1 : 0; };

  // 約数個数関数: σ_0(s) = ζ(s)ζ(s)
  template <class T, class P = ll>
  const auto divisor_count_primepower = [](const PrimePower<P> &q) -> T
  { return 1 + q.e; };
  // 約数総和関数: σ_1(s) = ζ(s)ζ(s-1)
  template <class T, class P = ll>
  const auto divisor_sum_primepower = [](const PrimePower<P> &q) -> T
  { return (q.pe * q.p - 1) / (q.p - 1); };
  // 約数の k 乗和: σ_k(s) = ζ(s)ζ(s-k)
  // f(p^e) = 1 + p^k + (p^k)^2 + ... + (p^k)^e
  // T は modint を想定
  // 素べき部分の計算量: O(log k + e)
  template <class T, class P = ll>
  const auto divisor_k_primepower = [](ll k)
  {
    return [&](const PrimePower<P> &q) -> T
    {
      T pk = T(q.p).pow(k);
      T r = 1, res = 0;
      for (int i = 0; i <= q.e; i++)
      {
        res += r;
        r *= pk;
      }
      return res;
    };
  };

  // オイラー関数 (トーシェント関数): φ(s) = ζ(s-1)/ζ(s)
  // n と互いに素な 1 以上 n 以下の整数の個数
  // n * prod_{p}(1 - 1/p)
  template <class T, class P = ll>
  const auto totient_primepower = [](const PrimePower<P> &q) -> T
  { return q.pe - q.pe / q.p; };

  // ----- 累積和が簡単に求まるもの -----

  // 単位元: ε(1) = 1, otherwise 0
  template <class T>
  const auto e_prefix_sum = [](ll n) -> T
  { return 1; };
  // ゼータ関数 ζ(s): 1(n) = 1
  template <class T>
  const auto zeta_prefix_sum = [](ll n) -> T
  { return n; };
  // 恒等写像 ζ(s-1): id(n) = n
  template <class T>
  const auto id_prefix_sum = [](ll n) -> T
  { return n % 2 == 0 ? T(n / 2) * T(n + 1) : T(n) * T((n + 1) / 2); };
};
using namespace multiplicative;

// 同じ数で何回も割るとき、modint 等で毎回 log がつかないようにしつつ、整数でも動くようにする
template <class T, const bool use_inv = numeric_limits<T>::is_integer>
struct Div
{
  T val, inv;
  Div() {}
  Div(const T &val) : val(val), inv(1 / val) {}

  T &divide(T &x) { return use_inv ? x *= inv : x /= val; }
  T divided(T x) { return x /= val; }
};

// prefix 形式
// f(1), ..., f(k) のテーブルを保持
template <class T>
struct DirichletP
{
public:
  int k;
  vector<T> f;
  bool is_multiplicative;

  DirichletP() {}
  // 乗法的関数の場合
  DirichletP(const int &k, const auto &f_primepower, const bool &is_completely_multiplicative = false)
  : k(k), is_multiplicative(true)
  {
    if (is_completely_multiplicative)
      f = multiplicative::enumerate_completely_multiplicative<T>(k, f_primepower);
    else
      f = multiplicative::enumerate_multiplicative<T>(k, f_primepower);
  }
  DirichletP(const vector<T> &f, const bool &is_multiplicative = false)
  : k((int)f.size() - 1), f(f), is_multiplicative(is_multiplicative)
  {}

private:
  static vector<T> prod_arbitrary(const vector<T> &a, const vector<T> &b)
  {
    assert(a.size() == b.size() && "DirichletP::prod_arbitrary");
    const int _k = (int)a.size() - 1;
    vector<T> c(_k + 1);
    for (int i = 1; i <= _k; i++)
      for (int j = 1; i * j <= _k; j++)
        c[i * j] += a[i] * b[j];
    return c;
  };
  static vector<T> prod_half_multiplicative(const vector<T> &a, const vector<T> &b) 
  {
    assert(a.size() == b.size() && "DirichletP::prod_half_multiplicative");
    const int _k = (int)a.size() - 1;
    LinearSieve::extend(_k);
    vector<T> c(b);
    for (const int &p : LinearSieve::primes)
    {
      if (p > _k)
        break;
      for (int i = _k / p; i > 0; i--)
      {
        int j = i * p;
        ll q = p;  // q = p^e
        int m = i; // j = p^e * m
        while (q <= _k)
        {
          c[j] += a[q] * c[m];
          if (m % p != 0)
            break;
          q *= p, m /= p;
        }
      }
    }
    return c;
  };
  static vector<T> prod_multiplicative(const vector<T> &a, const vector<T> &b)
  {
    assert(a.size() == b.size() && "DirichletP::prod_multiplicative");
    const int _k = (int)a.size() - 1;
    auto f_primepower = [&](const PrimePower<ll> &q)
    {
      T res = 0;
      for (int r = q.pe, s = 1; r >= 1; r /= q.p, s *= q.p)
        res += a[r] * b[s];
      return res;
    };
    return multiplicative::enumerate_multiplicative<T>(_k, f_primepower);
  };

  static vector<T> div_multiplicative(const vector<T> &c, const vector<T> &a)
  {
    assert(a.size() == c.size() && "DirichletP::div_multiplicative");
    const int _k = (int)a.size() - 1;
    if (_k == 0)
      return vector<T>(1);

    vector<T> b(_k + 1);
    b[1] = 1;
    for (const int &p : LinearSieve::primes)
    {
      for (ll pe = p; pe <= _k; pe *= p)
      {
        b[pe] = c[pe];
        for (int q = 1, r = pe; q < pe; q *= p, r /= p)
          b[pe] -= b[q] * a[r];
      }
    }
    return multiplicative::enumerate_multiplicative<T>(_k, [&](const PrimePower<ll> &q) -> T
                                                                 { return b[q.pe]; });
  }
  static vector<T> div_arbitrary(const vector<T> &c, const vector<T> &a)
  {
    assert(a.size() == c.size() && "DirichletP::div_arbitrary");
    const int _k = (int)a.size() - 1;
    if (_k == 0)
      return vector<T>(1);
    assert(a[1] != 0 && "div_arbitrary");

    vector<T> b(c.begin(), c.end());
    Div<T> div_a1(a[1]);
    for (int i = 1; i <= _k; i++)
    {
      div_a1.divide(b[i]);
      for (int j = 2; i * j <= _k; j++)
        b[i * j] -= a[j] * b[i];
    }
    return b;
  }

public:
  // 乗算
  // 両方が乗法的なら O(k)
  // 一方が乗法的なら O(k loglog k)
  // それ以外なら O(k log k)
  DirichletP operator*(const DirichletP<T> &other) const
  {
    vector<T> res_f;
    if (this->is_multiplicative && other.is_multiplicative)
      res_f = prod_multiplicative(this->f, other.f);
    else if (this->is_multiplicative)
      res_f = prod_half_multiplicative(this->f, other.f);
    else if (other.is_multiplicative)
      res_f = prod_half_multiplicative(other.f, this->f);
    else
      res_f = prod_arbitrary(this->f, other.f);

    bool res_is_multiplicative
      = this->is_multiplicative && other.is_multiplicative;

    return DirichletP(res_f, res_is_multiplicative);
  }
  DirichletP &operator*=(const DirichletP<T> &other) { return *this = *this * other; }

  // 逆元 
  // 乗法的なら O(k), そうでない場合 O(k log k)
  DirichletP inv() const
  {
    DirichletP e(k, multiplicative::e_primepower<T>, true);
    return e / *this;
  }

  // 除算 c/a
  // a, c ともに乗法的なら O(k)
  // a のみ乗法的なら O(k loglog k)
  // それ以外なら O(k log k)
  DirichletP operator/(const DirichletP<T> &other) const
  {
    vector<T> res_f;
    if (this->is_multiplicative && other.is_multiplicative)
      res_f = div_multiplicative(this->f, other.f);
    else if (other.is_multiplicative)
      res_f = prod_half_multiplicative(other.inv().f, this->f);
    else
      res_f = div_arbitrary(this->f, other.f);

    bool res_is_multiplicative
      = this->is_multiplicative && other.is_multiplicative;

    return DirichletP(res_f, res_is_multiplicative);
  }
  DirichletP &operator/=(const DirichletP<T> &other) { return *this = *this / other; }

  DirichletP pow(ll x) const
  {
    DirichletP res(k, multiplicative::e_primepower<T>, true);
    DirichletP tmp(*this);
    while (x > 0)
    {
      if (x & 1)
        res *= tmp;
      tmp *= tmp;
      x >>= 1;
    }
    return res;
  }
};

// prefix-quotient 形式
// f(1), ..., f(k) のテーブルに加え、
// F(1), ..., F(k) および F(⌊n/1⌋), ..., F(⌊n/l⌋) のテーブルも保持
// d に 2 以上を指定した場合は F(⌊(n/j)^{1/d}⌋) を保持
// k >= l および ⌊(n/(l+1))^{1/d}⌋ <= k を仮定
template <class T, const int d = 1>
struct DirichletPQ
{
public:
  DirichletP<T> pre;
  ll n;
  int l;
  vector<T> F, qF;

  // F(⌊(n/i)^{1/d}⌋) を得る
  T &getqF(ll i)
  {
    assert(1 <= i && i <= n);
    if (i <= l)
      return qF[i];
    else
      return F[iroot(n / i, d)];
  }

  DirichletPQ() {}
  DirichletPQ(const DirichletP<T> &pre, const ll &n, const int &l, const auto &getF)
  : pre(pre), n(n), l(l), F(pre.k + 1), qF(l + 1)
  {
    assert(pre.k >= l && "DirichletPQ");
    assert(iroot(n / (l + 1), d) <= pre.k && "DirichletPQ");
    for (int i = 1; i <= pre.k; i++)
      F[i] = F[i - 1] + pre.f[i];
    for (int i = 1; i <= l; i++)
      qF[i] = getF(iroot(n / i, d));
  }
  DirichletPQ(const DirichletP<T> &pre, const ll &n, const vector<T> &qF)
  : pre(pre), n(n), l((int)qF.size() - 1), F(pre.k + 1), qF(qF)
  {
    assert(pre.k >= l && "DirichletPQ");
    assert(iroot(n / (l + 1), d) <= pre.k && "DirichletPQ");
    for (int i = 1; i <= pre.k; i++)
      F[i] = F[i - 1] + pre.f[i];
  }

  // 乗算
  // - いずれも乗法的なら O(k + √(nl))
  //   - このとき k = O(n^{2/3}), l = O(n^{1/3}) が最適
  // - 一方のみ乗法的なら O(k loglog k + √(nl))
  //   - このとき k = O((n/loglog n)^{2/3}), l = O(n^{1/3}(loglog n)^{2/3}) が最適
  // - 乗法的でないなら O(k log k + √(nl))
  //   - このとき k = O((n/log n)^{2/3}), l = O(n^{1/3}(log n)^{2/3}) が最適
  // 一般の d では、O(k + (nl)^{1/2d}) 等で、k = O(n^{2/(2d+1)}), l = O(n^{1/(2d+1)})
  DirichletPQ operator*(const DirichletPQ &other) const
  {
    assert(n == other.n && pre.k == other.pre.k && l == other.l && "DirichletPQ::operator*");
    DirichletP<T> res_pre = pre * other.pre;
    vector<T> res_qF(l + 1);
    for (ll j = 1; j <= l; j++)
    {
      const int m = iroot(n / j, 2 * d);
      for (ll i = 1; i <= m; i++)
        res_qF[j] += pre.f[i] * other.getqF(ipow(i, d) * j) + other.pre.f[i] * (getqF(ipow(i, d) * j) - F[m]);
    }
    return DirichletPQ(res_pre, n, res_qF);
  }
  DirichletPQ &operator*=(const DirichletPQ &other) { return *this = *this * other; }

  // 除算
  // - いずれも乗法的なら O(k + √(nl))
  //   - このとき k = O(n^{2/3}), l = O(n^{1/3}) が最適
  // - 一方のみ乗法的なら O(k loglog k + √(nl))
  //   - このとき k = O((n/loglog n)^{2/3}), l = O(n^{1/3}(loglog n)^{2/3}) が最適
  // - 乗法的でないなら O(k log k + √(nl))
  //   - このとき k = O((n/log n)^{2/3}), l = O(n^{1/3}(log n)^{2/3}) が最適
  // 一般の d では、O(k + (nl)^{1/2d}) 等で、k = O(n^{2/(2d+1)}), l = O(n^{1/(2d+1)})
  DirichletPQ operator/(const DirichletPQ &other) const
  {
    assert(n == other.n && pre.k == other.pre.k && l == other.l && "DirichletPQ::operator/");
    if (pre.k == 0)
      return DirichletPQ(DirichletP<T>(vector<T>(1)), n, vector<T>(l + 1));
    assert(other.pre.f[1] != 0 && "DirichletPQ::operator/");
    DirichletPQ res(pre / other.pre, n, qF);
    Div<T> div_a1(other.pre.f[1]);
    for (ll j = l; j >= 1; j--)
    {
      const int m = iroot(n / j, 2 * d);
      for (ll i = 1; i <= m; i++)
        res.qF[j] -= res.pre.f[i] * (other.getqF(ipow(i, d) * j) - other.F[m]);
      for (ll i = 2; i <= m; i++)
        res.qF[j] -= other.pre.f[i] * res.getqF(ipow(i, d) * j);
      div_a1.divide(res.qF[j]);
    }
    return res;
  }
  DirichletPQ &operator/=(const DirichletPQ<T> &other) { return *this = *this / other; }

  // 逆元
  DirichletPQ inv() const
  {
    DirichletPQ e(DirichletP<T>(pre.k, multiplicative::e_primepower<T>, true), n, l, multiplicative::e_prefix_sum<T>);
    return e / *this;
  }

  DirichletPQ pow(ll x) const
  {
    DirichletPQ res(DirichletP<T>(pre.k, multiplicative::e_primepower<T>, true), n, l, multiplicative::e_prefix_sum<T>);
    DirichletPQ tmp(*this);
    while (x > 0)
    {
      if (x & 1)
        res *= tmp;
      tmp *= tmp;
      x >>= 1;
    }
    return res;
  }
};

// 積 c = a * b について C(n) ひとつを求める
// 計算量: O(k + l)
// k = l = ⌊√n⌋ がオーダー最適で O(√n)
template <class T>
T prodFn(const DirichletPQ<T> &a, const DirichletPQ<T> &b)
{
  assert(a.n == b.n && a.pre.k == b.pre.k && a.l == b.l && "prodFn");
  T ans = 0;
  for (int i = 1; i <= a.l; i++)
    ans += a.pre.f[i] * b.getqF(i);
  for (int j = 1; j <= a.pre.k; j++)
    ans += b.pre.f[j] * (a.getqF(j) - a.F[a.l]);
  return ans;
}

// 入力: **完全**乗法的関数 f について、PQ 形式および素数部分の値 f(p)
// 出力: f の素数部分のみを取り出した関数の PQ 形式
// 計算量: k = l = ⌊√n⌋ とするとき O(n^{3/4} / log n)
template <class T>
DirichletPQ<T> lucy_dp(const DirichletPQ<T> &f_pq, const auto &f_primepower)
{
  // dp_i(v) := 2 以上 v 以下の整数のうち i 以下の素数でふるわれない整数に対する f の和
  // i が素数でないか、v < i^2 の場合、dp_i(v) = dp_{i-1}(v)
  // それ以外の場合、dp_i(v) = dp_{i-1}(v) - f(i) ( dp_{i-1}(⌊v/i⌋) - dp_{i-1}(i-1) )

  vector<T> res_qF(f_pq.l + 1);
  for (int j = 1; j <= f_pq.l; j++)
    res_qF[j] = f_pq.qF[j] - 1;
  DirichletPQ<T> res(f_pq.pre, f_pq.n, res_qF);
  for (int v = 1; v <= f_pq.pre.k; v++)
    res.F[v] -= 1;
  
  for (ll i = 2; i * i <= f_pq.n; i++)
  {
    // この時点で dp_{i-1} が入っている
    if (!LinearSieve::is_prime(i))
      continue;
    T fi = f_primepower(PrimePower<ll>(i, 1, i));
    T dp_i_minus_1 = res.F[i - 1];
    for (int j = 1; j <= f_pq.l; j++)
    {
      dump(i, j, f_pq.n / j, res.getqF(j));
      // dp(⌊n/j⌋) を更新
      if (f_pq.n / j < i * i)
        break;
      res.getqF(j) -= fi * (res.getqF(i * j) - dp_i_minus_1);
    }
    for (int v = f_pq.pre.k; v >= 1; v--)
    {
      // dp(v) を更新
      if (v < i * i)
        break;
      res.F[v] -= fi * (res.F[v / i] - dp_i_minus_1);
    }
  }
  for (int i = 2; i <= f_pq.pre.k; i++)
  {
    if (!LinearSieve::is_prime(i))
      res.pre.f[i] = 0;
  }
  return res;
}

void init() {}

void main2()
{
  /* segmented sieve https://atcoder.jp/contests/abc227/tasks/abc227_g
  LL(N, K);
  SegmentedSieve ssv(N - K + 1, N);
  vl cnt(max(K, (ll)sqrtl(N)) + 1, 0);
  mint ans = 1;
  rep(x, 1, K + 1)
  {
    auto facs = ssv.factorize(x);
    dump(x, facs);
    for (cauto &fac : facs)
      cnt.at(fac.p) -= fac.e;
  }
  rep(x, N - K + 1, N + 1)
  {
    auto facs = ssv.factorize(x);
    dump(x, facs);
    for (cauto &fac : facs)
    {
      if (fac.p < (int)cnt.size())
        cnt.at(fac.p) += fac.e;
      else
        ans *= 2;
    }
  }
  rep(x, cnt.size()) ans *= 1 + cnt.at(x);
  PRINT(ans);
  //*/

  /* gcd convolution https://judge.yosupo.jp/problem/gcd_convolution
  INT(N);
  VEC(mint, N, A, B);
  A.insert(A.begin(), 0), B.insert(B.begin(), 0);
  auto C = gcd_convolution(A, B);
  C.erase(C.begin());
  PRINTVEC(C);
  //*/

  /* lcm convolution https://judge.yosupo.jp/problem/lcm_convolution
  INT(N);
  VEC(mint, N, A, B);
  A.insert(A.begin(), 0), B.insert(B.begin(), 0);
  auto C = lcm_convolution(A, B);
  C.erase(C.begin());
  PRINTVEC(C);
  //*/

  /* enumerate multiplicative
  LL(N);
  dump(enumerate_completely_multiplicative<mint>(N, e_primepower<mint>) | cp::index());
  dump(enumerate_completely_multiplicative<mint>(N, zeta_primepower<mint>) | cp::index());
  dump(enumerate_completely_multiplicative<mint>(N, id_primepower<mint>) | cp::index());
  dump(enumerate_completely_multiplicative<static_modint<10000>>(N, pow_primepower<static_modint<10000>>(3)) | cp::index());
  dump(enumerate_completely_multiplicative<static_modint<10000>>(N, pow_inv_primepower<static_modint<10000>>(3)) | cp::index());

  dump(enumerate_multiplicative<mint>(N, mobius_primepower<mint>) | cp::index());
  dump(enumerate_multiplicative<mint>(N, divisor_count_primepower<mint>) | cp::index());
  dump(enumerate_multiplicative<mint>(N, divisor_sum_primepower<mint>) | cp::index());
  dump(enumerate_multiplicative<mint>(N, divisor_k_primepower<mint>(2)) | cp::index());
  dump(enumerate_multiplicative<mint>(N, totient_primepower<mint>) | cp::index());
  //*/

  /* dirichletP
  // declared as multiplicative
  DirichletP<mint> mobius_p(100, mobius_primepower<mint>);
  DirichletP<mint> totient_p(100, totient_primepower<mint>);
  DirichletP<mint> zeta_p(100, zeta_primepower<mint>);
  DirichletP<mint> id_p(100, id_primepower<mint>);

  // declared as non-multiplicative
  DirichletP<mint> mobius_p_nm(mobius_p.f);
  DirichletP<mint> totient_p_nm(totient_p.f);
  DirichletP<mint> zeta_p_nm(zeta_p.f);
  DirichletP<mint> id_p_nm(id_p.f);

  // totient * zeta = id
  dump(id_p.f | cp::index());
  for (cauto &T : {totient_p, totient_p_nm})
  {
    for (cauto &Z : {zeta_p, zeta_p_nm})
    {
      dump(T.f | cp::index(), Z.f | cp::index(), (T * Z).f | cp::index(), T.is_multiplicative, Z.is_multiplicative);
      assert((T * Z).f == id_p.f);
    }
  }
  for (cauto &T : {totient_p, totient_p_nm})
  {
    for (cauto &M : {mobius_p, mobius_p_nm})
    {
      dump(T.f | cp::index(), M.f | cp::index(), (T * M).f | cp::index(), T.is_multiplicative, M.is_multiplicative);
      assert((T / M).f == id_p.f);
    }
  }
  exit(0);
  //*/

  /* https://atcoder.jp/contests/abc172/tasks/abc172_d
  // prodFn
  // sum[ij <= N] ij
  LL(N);
  ll M = sqrtl(N);
  DirichletPQ<ll> id(DirichletP<ll>(M, id_primepower<ll>), N, M, id_prefix_sum<ll>);
  ll ans = prodFn(id, id);
  PRINT(ans);
  //*/

  /* https://atcoder.jp/contests/arc116/tasks/arc116_c
  // pow
  // sum((zeta)^N)[M]
  LL(N, M);
  ll L = pow(M, 1.0 / 3.0), K = divceil(M, L);
  DirichletPQ<mint> zeta(DirichletP<mint>(K, zeta_primepower<mint>), M, L, zeta_prefix_sum<mint>);
  mint ans = zeta.pow(N).getqF(1);
  PRINT(ans);
  //*/

  /* sum of totient https://judge.yosupo.jp/problem/sum_of_totient_function
  // zeta * phi = id
  LL(N);
  ll L = pow(N, 1.0 / 3.0), K = divceil(N, L);
  DirichletPQ<mint> zeta(DirichletP<mint>(K, zeta_primepower<mint>), N, L, zeta_prefix_sum<mint>);
  DirichletPQ<mint> id(DirichletP<mint>(K, id_primepower<mint>), N, L, id_prefix_sum<mint>);
  DirichletPQ<mint> phi = id / zeta;
  PRINT(phi.getqF(1));
  //*/

  /* counting square-frees https://judge.yosupo.jp/problem/counting_squarefrees
  // ζ(s)/ζ(2s) の累積和なので sum[ij <= N, j は平方数] μ(√j)
  // = sum[k^2 <= N] μ(k) ⌊N/k^2⌋
  // = sum[x: ∃k, x = ⌊N/k^2⌋] x( M(⌊√(N/x)⌋) - M(⌊√(N/(x+1))⌋) )
  LL(N);
  ll L = 3 * pow(N, 1.0 / 5.0), K = max(L, (ll)sqrtl(divceil(N, L)));
  dump(N, K, L);
  DirichletPQ<ll, 2> zeta(DirichletP<ll>(K, zeta_primepower<ll>), N, L, zeta_prefix_sum<ll>);
  auto mobius = zeta.inv();
  ll ans = 0;
  for (ll k = 1; k * k <= N;)
  {
    ll x = N / (k * k);
    dump(k, x);
    ans += x * (mobius.getqF(x) - (x == N ? 0 : mobius.getqF(x + 1)));
    k = sqrtl(N / x) + 1;
  }
  PRINT(ans);
  //*/

  /* counting primes https://judge.yosupo.jp/problem/counting_primes
  LL(N);
  ll M = sqrtl(N);
  DirichletPQ<ll> zeta(DirichletP<ll>(M, zeta_primepower<ll>), N, M, zeta_prefix_sum<ll>);
  auto zeta_prime = lucy_dp(zeta, zeta_primepower<ll>);
  PRINT(zeta_prime.getqF(1));
  //*/

  //* sum of primes https://yukicoder.me/problems/no/713
  LL(N);
  ll M = sqrtl(N);
  DirichletPQ<ll> id(DirichletP<ll>(M, id_primepower<ll>), N, M, id_prefix_sum<ll>);
  auto id_prime = lucy_dp(id, id_primepower<ll>);
  PRINT(id_prime.getqF(1));
  //*/
}

void test()
{
  /*
  #ifdef LOCAL
  rep(t, 100000)
  {
    dump(t);

    // ----- generate cases -----
    ll N = 1 + rand() % 5;
    ll K = -10 + rand() % 21;
    vl A(N);
    rep(i, N) A.at(i) = -10 + rand() % 21;
    // --------------------------

    // ------ check output ------
    auto god = naive(K, A);
    auto ans = solve(K, A);
    if (god != ans)
    {
      dump(N, K, A);
      dump(god, ans);
      exit(0);
    }
    // --------------------------
  }
  dump("ok");
  #endif
  //*/
}

int main()
{
  cauto CERR = [](cauto &val)
  {
    #ifndef BOJ
      cerr << val;
    #endif
  };

  #if defined FAST_IO and not defined LOCAL
    CERR("[FAST_IO]\n\n");
    cin.tie(0);
    ios::sync_with_stdio(false);
  #endif
  cout << fixed << setprecision(20);

  test();
  init();

  #if defined AOJ_TESTCASE or (defined LOCAL and defined SINGLE_TESTCASE)
    CERR("[AOJ_TESTCASE]\n\n");
    while (true) main2();
  #elif defined SINGLE_TESTCASE
    CERR("[SINGLE_TESTCASE]\n\n");
    main2();
  #elif defined MULTI_TESTCASE
    CERR("[MULTI_TESTCASE]\n\n");
    int T;
    cin >> T;
    while (T--) main2();
  #endif
}