#include using namespace std; //* #define INCLUDE_MODINT //*/ #ifdef INCLUDE_MODINT #include using namespace atcoder; using mint = modint998244353; // using mint = modint1000000007; // using mint = modint; #endif namespace mytemplate { using ll = long long; using dbl = double; using ld = long double; using uint = unsigned int; using ull = unsigned long long; using pll = pair; using tlll = tuple; using tllll = tuple; template using vc = vector; template using vvc = vector>; template using vvvc = vector>>; using vb = vc; using vl = vc; using vpll = vc; using vtlll = vc; using vtllll = vc; using vstr = vc; using vvb = vvc; using vvl = vvc; #ifdef __SIZEOF_INT128__ using i128 = __int128_t; i128 stoi128(string s) { i128 res = 0; if (s.front() == '-') { for (int i = 1; i < (int)s.size(); i++) res = 10 * res + s[i] - '0'; res = -res; } else { for (auto c : s) res = 10 * res + c - '0'; } return res; } string i128tos(i128 x) { string sign = "", res = ""; if (x < 0) x = -x, sign = "-"; while (x > 0) { res += '0' + x % 10; x /= 10; } reverse(res.begin(), res.end()); if (res == "") return "0"; return sign + res; } istream &operator>>(istream &is, i128 &a) { string s; is >> s; a = stoi128(s); return is; } ostream &operator<<(ostream &os, const i128 &a) { os << i128tos(a); return os; } #endif #define cauto const auto #define overload4(_1, _2, _3, _4, name, ...) name #define rep1(i, n) for (ll i = 0, nnnnn = ll(n); i < nnnnn; i++) #define rep2(i, l, r) for (ll i = ll(l), rrrrr = ll(r); i < rrrrr; i++) #define rep3(i, l, r, d) for (ll i = ll(l), rrrrr = ll(r), ddddd = ll(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d) #define rep(...) overload4(__VA_ARGS__, rep3, rep2, rep1)(__VA_ARGS__) #define repi1(i, n) for (int i = 0, nnnnn = int(n); i < nnnnn; i++) #define repi2(i, l, r) for (int i = int(l), rrrrr = int(r); i < rrrrr; i++) #define repi3(i, l, r, d) for (int i = int(l), rrrrr = int(r), ddddd = int(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d) #define repi(...) overload4(__VA_ARGS__, repi3, repi2, repi1)(__VA_ARGS__) #define ALL(a) (a).begin(), (a).end() const ll INF = 4'000'000'000'000'000'037; bool chmin(auto &a, const auto &b) { return a > b ? a = b, true : false; } bool chmax(auto &a, const auto &b) { return a < b ? a = b, true : false; } template T1 safemod(auto a, auto m) { T1 res = a % m; if (res < 0) res += m; return res; } template T1 divfloor(auto a, auto b) { if (b < 0) a = -a, b = -b; return (a - safemod(a, b)) / b; } template T1 divceil(auto a, auto b) { if (b < 0) a = -a, b = -b; return divfloor(a + b - 1, b); } template T1 ipow(auto a, auto b) { if (a == 0) return b == 0 ? 1 : 0; if (a == 1) return a; if (a == -1) return b & 1 ? -1 : 1; ll res = 1; rep(_, b) res *= a; return res; } template T1 mul_limited(auto a, auto b, T1 m = INF) { return b == 0 ? 0 : a > m / b ? m : a * b; } template T1 pow_limited(auto a, auto b, T1 m = INF) { if (a == 0) return b == 0 ? 1 : 0; if (a == 1) return a; ll res = 1; rep(_, b) { if (res > m / a) return m; res *= a; } return res; } template constexpr T iroot(cauto &a, cauto &k) { assert(a >= 0 && k >= 1); if (a <= 1 || k == 1) return a; if (k == 2) return sqrtl(a); auto isok = [&](T x) -> bool { if (x == 0) return true; T res = 1; for (T k2 = k;;) { if (k2 & 1) { if (res > T(a) / x) return false; res *= x; } k2 >>= 1; if (k2 == 0) break; if (x > T(a) / x) return false; x *= x; } return res <= T(a); }; T ok = pow(a, 1.0 / k); while (!isok(ok)) ok--; while (ok < numeric_limits::max() && isok(ok + 1)) ok++; return ok; } template vector b_ary(T1 x, int b) { vector a; while (x > 0) { a.emplace_back(x % b); x /= b; } reverse(a.begin(), a.end()); return a; } template vector b_ary(T1 x, int b, int n) { vector a(n); rep(i, n) { a[i] = x % b; x /= b; } reverse(a.begin(), a.end()); return a; } template string b_ary_str(T1 x, int b) { auto a = b_ary(x, b); string s = ""; for (auto &&ai : a) s += (ai < 10 ? '0' + ai : 'A' + (ai - 10)); return s; } template string b_ary_str(T1 x, int b, int n) { auto a = b_ary(x, b, n); string s = ""; for (auto &&ai : a) s += (ai < 10 ? '0' + ai : 'A' + (ai - 10)); return s; } template vector> iprod(const vector &a) { vector> res; vector tmp(a.size()); auto dfs = [&](auto self, int i) { if (i == (int)a.size()) { res.emplace_back(tmp); return; } rep(j, a[i]) { tmp[i] = j; self(self, i + 1); } }; dfs(dfs, 0); return res; } template struct max_op { T operator()(const T &a, const T &b) const { return max(a, b); } }; template struct min_op { T operator()(const T &a, const T &b) const { return min(a, b); } }; template struct const_fn { T operator()() const { return val; } }; using max_e = const_fn; using min_e = const_fn; using zero_fn = const_fn; template vector digitvec(const string &s) { int n = s.size(); vector a(n); rep(i, n) a[i] = s[i] - '0'; return a; } template auto make_vec(const auto (&sz)[d], const T &init) { if constexpr (i < d) return vector(sz[i], make_vec(sz, init)); else return init; } template vector permid(int n, int base_index = 0) { vector p(n); rep(i, n) p[i] = i + base_index; return p; } template vector perminv(const vector &p) { vector q(p.size()); rep(i, p.size()) q[p[i]] = i; return q; } template vector combid(int n, int k) { vector p(n, 0); fill(p.rbegin(), p.rbegin() + k, 1); return p; } template auto gen_vec(int n, const F &f) { using T = decltype(f(0)); vector res(n); rep(i, n) res[i] = f(i); return res; } // res[i] = op[0, i) for 0 <= i < n+1 template ())> vector cuml(vector v, const F &op = plus<>(), const T &e = 0) { v.emplace_back(e); exclusive_scan(v.begin(), v.end(), v.begin(), e, op); return v; } // res[i] = op[i, n) for 0 <= i < n+1 template ())> vector cumr(vector v, const F &op = plus<>(), const T &e = 0) { v.insert(v.begin(), e); exclusive_scan(v.rbegin(), v.rend(), v.rbegin(), e, op); return v; } // res[i] = v[i] - v[i-1] for 0 <= i < n+1 template vector adjd(vector v) { v.emplace_back(0); adjacent_difference(v.begin(), v.end(), v.begin()); return v; } template vector cumlmax(const vector &v) { return cuml(v, max_op(), max_e()()); } template vector cumrmax(const vector &v) { return cumr(v, max_op(), max_e()()); } template vector cumlmin(const vector &v) { return cuml(v, min_op(), min_e()()); } template vector cumrmin(const vector &v) { return cumr(v, min_op(), min_e()()); } template vector sorted(vector v) { sort(v.begin(), v.end()); return v; } template vector reversed(const vector &v) { return {v.rbegin(), v.rend()}; } template void unique(vector &v) { v.erase(unique(v.begin(), v.end()), v.end()); } template vector uniqued(vector v) { v.erase(unique(v.begin(), v.end()), v.end()); return v; } template void sortunique(vector &v) { sort(v.begin(), v.end()); v.erase(unique(v.begin(), v.end()), v.end()); } template vector sortuniqued(vector v) { sort(v.begin(), v.end()); v.erase(unique(v.begin(), v.end()), v.end()); return v; } template void rotate(vector &v, int k) { rotate(v.begin(), v.begin() + k, v.end()); } template vector rotated(vector v, int k) { rotate(v.begin(), v.begin() + k, v.end()); return v; } string sorted(string s) { sort(s.begin(), s.end()); return s; } string reversed(const string &s) { return {s.rbegin(), s.rend()}; } void unique(string &s) { s.erase(unique(s.begin(), s.end()), s.end()); } string uniqued(string s) { s.erase(unique(s.begin(), s.end()), s.end()); return s; } void sortunique(string &s) { sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); } string sortuniqued(string s) { sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); return s; } void rotate(string &s, int k) { rotate(s.begin(), s.begin() + k, s.end()); } string rotated(string s, int k) { rotate(s.begin(), s.begin() + k, s.end()); return s; } template vector> top(const vector> &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vector> b(m, vector(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; } vstr top(const vstr &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vstr b(m, string(n, 0)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; } template vector> rot90(const vector> &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vector> b(m, vector(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][n - 1 - i] = a[i][j]; return b; } vstr rot90(const vstr &a) { if (a.empty()) return {}; const size_t n = a.size(), m = a[0].size(); vstr b(m, string(n, 0)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][n - 1 - i] = a[i][j]; return b; } #if __cplusplus < 202002L ull bit_ceil(ull x) { ull y = 1; while (y < x) y <<= 1; return y; } ull bit_floor(ull x) { ull y = 1; while (y <= x) y <<= 1; return y >> 1; } ull bit_width(ull x) { ull y = 1, z = 0; while (y <= x) y <<= 1, z++; return z; } ull countr_zero(ull x) { return __builtin_ctzll(x); } ull popcount(ull x) { return __builtin_popcountll(x); } ull has_single_bit(ull x) { return popcount(x) == 1; } #endif ull lsb_pos(ull x) { assert(x != 0); return countr_zero(x); } ull msb_pos(ull x) { assert(x != 0); return bit_width(x) - 1; } ull lsb_mask(ull x) { assert(x != 0); return x & -x; } ull msb_mask(ull x) { assert(x != 0); return bit_floor(x); } bool btest(ull x, uint k) { return (x >> k) & 1; } template void bset(T &x, uint k, bool b = 1) { b ? x |= (1ULL << k) : x &= ~(1ULL << k); } template void bflip(T &x, uint k) { x ^= (1ULL << k); } bool bsubset(ull x, ull y) { return (x & y) == x; } template vector> bsubsets(T x) { vector> res; for (T y = x; y > 0; y = (y - 1) & x) res.emplace_back(make_pair(y, x & ~y)); res.emplace_back(make_pair(0, x)); return res; } template Tuple tuple_add(Tuple &a, const Tuple &b, const index_sequence) { ((get(a) += get(b)), ...); return a; } template Tuple operator+=(Tuple &a, const Tuple &b) { return tuple_add(a, b, make_index_sequence>{}); } template Tuple operator+(Tuple a, const Tuple &b) { return a += b; } template void offset(vector &v, U add) { for (auto &vi : v) vi += add; } template void offset(vector> &v, U add) { for (auto &vi : v) for (auto &vij : vi) vij += add; } template array, m> top(const vector> &a) { const size_t n = a.size(); array, m> b; b.fill(vector(n)); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i][j]; return b; } template vector> top(const array, n> &a) { if (a.empty()) return {}; const size_t m = a[0].size(); vector> b(m); for (size_t i = 0; i < n; i++) for (size_t j = 0; j < m; j++) b[j][i] = a[i].at(j); return b; } template pair, vector> top(const vector> &a) { const size_t n = a.size(); vector b(n); vector c(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i]) = a[i]; return make_pair(b, c); } template vector> top(const pair, vector> &a) { const size_t n = a.first.size(); vector> b(n); for (size_t i = 0; i < n; i++) b[i] = make_pair(a.first[i], a.second.at(i)); return b; } template tuple, vector, vector> top(const vector> &a) { const size_t n = a.size(); vector b(n); vector c(n); vector d(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i], d[i]) = a[i]; return make_tuple(b, c, d); } template vector> top(const tuple, vector, vector> &a) { const size_t n = get<0>(a).size(); vector> b(n); for (size_t i = 0; i < n; i++) b[i] = make_tuple(get<0>(a)[i], get<1>(a).at(i), get<2>(a).at(i)); return b; } template tuple, vector, vector, vector> top(const vector> &a) { const size_t n = a.size(); vector b(n); vector c(n); vector d(n); vector e(n); for (size_t i = 0; i < n; i++) tie(b[i], c[i], d[i], e[i]) = a[i]; return make_tuple(b, c, d, e); } template vector> top(const tuple, vector, vector, vector> &a) { const size_t n = get<0>(a).size(); vector> b(n); for (size_t i = 0; i < n; i++) b[i] = make_tuple(get<0>(a)[i], get<1>(a).at(i), get<2>(a).at(i), get<3>(a).at(i)); return b; } #ifdef INCLUDE_MODINT using namespace atcoder; template * = nullptr> istream &operator>>(istream &is, T &a) { ll v; is >> v; a = v; return is; } template * = nullptr> ostream &operator<<(ostream &os, const T &a) { os << a.val(); return os; } #define MINT(...) mint __VA_ARGS__; INPUT(__VA_ARGS__) #endif template ::value == true> * = nullptr> istream &operator>>(istream &is, Tuple &t) { apply([&](auto&... a){ (is >> ... >> a); }, t); return is; } template void INPUT(T&... a) { (cin >> ... >> a); } template void INPUTVEC(int n, vector &v) { v.resize(n); rep(i, n) cin >> v[i]; } template void INPUTVEC(int n, vector& v, vector&... vs) { INPUTVEC(n, v); INPUTVEC(n, vs...); } template void INPUTVEC2(int n, int m, vector> &v) { v.assign(n, vector(m)); rep(i, n) rep(j, m) cin >> v[i][j]; } template void INPUTVEC2(int n, int m, vector& v, vector&... vs) { INPUTVEC2(n, m, v); INPUTVEC2(n, m, vs...); } #define INT(...) int __VA_ARGS__; INPUT(__VA_ARGS__) #define LL(...) ll __VA_ARGS__; INPUT(__VA_ARGS__) #define STR(...) string __VA_ARGS__; INPUT(__VA_ARGS__) #define ARR(T, n, ...) array __VA_ARGS__; INPUT(__VA_ARGS__) #define VEC(T, n, ...) vector __VA_ARGS__; INPUTVEC(n, __VA_ARGS__) #define VEC2(T, n, m, ...) vector> __VA_ARGS__; INPUTVEC2(n, m, __VA_ARGS__) template void PRINT(const T &a) { cout << a << '\n'; } template void PRINT(const T& a, const Ts&... b) { cout << a; (cout << ... << (cout << ' ', b)); cout << '\n'; } template void PRINTVEC(const vector &v) { int n = v.size(); rep(i, n) cout << v[i] << (i == n - 1 ? "" : " "); cout << '\n'; } template void PRINTVECT(const vector &v) { for (auto &vi : v) cout << vi << '\n';} template void PRINTVEC2(const vector> &v) { for (auto &vi : v) PRINTVEC(vi); } #define PRINTEXIT(...) do { PRINT(__VA_ARGS__); exit(0); } while (false) #define PRINTRETURN(...) do { PRINT(__VA_ARGS__); return; } while (false) } using namespace mytemplate; #ifdef LOCAL #include // https://github.com/philip82148/cpp-dump namespace cpp_dump::_detail { inline string export_var( const i128 &x, const string &indent, size_t last_line_length, size_t current_depth, bool fail_on_newline, const export_command &command ) { return export_var(i128tos(x), indent, last_line_length, current_depth, fail_on_newline, command); } #ifdef INCLUDE_MODINT template inline std::string export_var( const atcoder::static_modint &mint, const std::string &indent, std::size_t last_line_length, std::size_t current_depth, bool fail_on_newline, const export_command &command ) { return export_var(mint.val(), indent, last_line_length, current_depth, fail_on_newline, command); } template inline std::string export_var( const atcoder::dynamic_modint &mint, const std::string &indent, std::size_t last_line_length, std::size_t current_depth, bool fail_on_newline, const export_command &command ) { return export_var(mint.val(), indent, last_line_length, current_depth, fail_on_newline, command); } #endif } // namespace cpp_dump::_detail #define dump(...) cpp_dump(__VA_ARGS__) namespace cp = cpp_dump; CPP_DUMP_SET_OPTION_GLOBAL(log_label_func, cp::log_label::line()); CPP_DUMP_SET_OPTION_GLOBAL(max_iteration_count, 10000); #else #define dump(...) #endif #define SINGLE_TESTCASE // #define MULTI_TESTCASE // #define AOJ_TESTCASE #define FAST_IO template struct PrimePower { P p; int e; P pe; PrimePower() : p(-1), e(-1), pe(-1) {} PrimePower(P p, int e = 1) : p(p), e(e), pe(ipow(p, e)) {} PrimePower(P p, int e, P pe) : p(p), e(e), pe(pe) {} void mulp() { e++, pe *= p; } void divp() { e--, pe /= p; } template PrimePower(const PrimePower &pp) { p = pp.p, e = pp.e, pe = pp.pe; } }; #ifdef LOCAL CPP_DUMP_DEFINE_EXPORT_OBJECT(PrimePower, p, e, pe); CPP_DUMP_DEFINE_EXPORT_OBJECT(PrimePower, p, e, pe); #endif struct LinearSieve { private: static vector prime_id; public: static int n; static vector> _lpf; static vector primes; static void extend(int _n) { if (_n <= n) return; n = max(_n, 2 * n); prime_id.resize(n + 1, 0); _lpf.resize(n + 1); for (int d = 2; d <= n; d++) { if (_lpf[d].p == -1) _lpf[d] = PrimePower(d, 1, d), primes.emplace_back(d); for (int &i = prime_id[d]; i < (int)primes.size(); i++) { int p = primes[i]; if (p > n / d || p > _lpf[d].p) break; if (_lpf[d].p == p) _lpf[p * d] = PrimePower(p, _lpf[d].e + 1, _lpf[d].pe * p); else _lpf[p * d] = PrimePower(p, 1, p); } } } static PrimePower lpf(int x) { assert(x >= 1 && "LinearSieve::lpf"); extend(x); return _lpf[x]; } static bool is_prime(int x) { if (x <= 1) return false; return lpf(x).p == x; } // 計算量: O(x の素因数の種類数) = O(log x / loglog x) static vector> factorize(int x) { assert(x >= 1 && "LinearSieve::factorize"); extend(x); vector> res; while (x > 1) { res.emplace_back(_lpf[x]); x /= _lpf[x].pe; } return res; } }; vector LinearSieve::prime_id{}; vector> LinearSieve::_lpf{}; int LinearSieve::n{}; vector LinearSieve::primes{}; using sv = LinearSieve; struct SegmentedSieve { // pfs[x - l] は x の素因数 (x 以外) vector> pfs; ll l, r; SegmentedSieve() {} // 前計算の計算量: O( ( √r + (r-l) ) loglog r ) SegmentedSieve(ll l, ll r) : l(l), r(r) { LinearSieve::extend(sqrtl(r) + 1); pfs.resize(r - l + 1); for (const ll p : LinearSieve::primes) for (ll x = max(2 * p, divceil(l, p) * p); x <= r; x += p) pfs[x - l].emplace_back(p); } bool is_prime(ll x) { if (l <= x && x <= r) return pfs[l - x].empty(); assert(x <= INT32_MAX && "SegmentedSieve::is_prime"); return LinearSieve::is_prime(x); } // 計算量: O(log x) vector> factorize(ll x) { if (l <= x && x <= r) { vector> res; for (const ll p : pfs[x - l]) { int e = 0; ll pe = 1; while (x % p == 0) x /= p, e++, pe *= p; res.emplace_back(PrimePower(p, e, pe)); } if (x != 1) res.emplace_back(PrimePower(x, 1, x)); return res; } assert(x <= INT32_MAX && "SegmentedSieve::factorize"); vector> res = LinearSieve::factorize(x); return vector>(res.begin(), res.end()); } }; namespace zeta_mobius_small { // ζa(n) = Σ[d | n] a(d) // a[0] は用いない template vector zeta_divisor(const vector &a) { const int n = (int)a.size() - 1; LinearSieve::extend(n); vector b(a); for (const int &p : LinearSieve::primes) { if (p > n) break; for (int i = 1; i * p <= n; i++) b[i * p] += b[i]; } return b; } // μ は ζ の逆変換 // μa(n) = Σ{d | n} μ(n/d)a(d) cf. メビウスの反転公式 // a[0] は用いない template vector mobius_divisor(const vector &a) { const int n = (int)a.size() - 1; LinearSieve::extend(n); vector b(a); for (const int &p : LinearSieve::primes) { if (p > n) break; for (int i = n / p; i >= 1; i--) b[i * p] -= b[i]; } return b; } // ζ'a(n) = Σ{n | m} a(m) // a[0] は用いない template vector zeta_multiple(const vector &a) { const int n = (int)a.size() - 1; LinearSieve::extend(n); vector b(a); for (const int &p : LinearSieve::primes) { if (p > n) break; for (int i = n / p; i >= 1; i--) b[i] += b[i * p]; } return b; } // μ' は ζ' の逆変換 // μ'a(n) = Σ{n | m} μ(m/n)g(m) // a[0] は用いない template vector mobius_multiple(const vector &a) { const int n = (int)a.size() - 1; LinearSieve::extend(n); vector b(a); for (const int &p : LinearSieve::primes) { if (p > n) break; for (int i = 1; i * p <= n; i++) b[i] -= b[i * p]; } return b; } // |a| = |b| を仮定 // a[0], b[0] は用いない template vector lcm_convolution(const vector &a, const vector &b) { assert(a.size() == b.size() && "lcm_convolution"); vector za = zeta_divisor(a), zb = zeta_divisor(b); vector zc(a.size()); for (int i = 1; i < (int)a.size(); i++) zc[i] = za[i] * zb[i]; return mobius_divisor(zc); } // |a| = |b| を仮定 // a[0], b[0] は用いない template vector gcd_convolution(const vector &a, const vector &b) { assert(a.size() == b.size() && "gcd_convolution"); vector za = zeta_multiple(a), zb = zeta_multiple(b); vector zc(a.size()); for (int i = 1; i < (int)a.size(); i++) zc[i] = za[i] * zb[i]; return mobius_multiple(zc); } }; using namespace zeta_mobius_small; namespace multiplicative { // 完全乗法的関数: f(1) = 1, f(p1^e1 ... pk^ek) = f(p1)^e1 ... f(pk)^ek // f(p) が O(t) 時間で計算できるとき f(1), ..., f(n) を O(n + nt/log n) 時間で計算 // f_primepower の引数は PrimePower 型 (なお f(p) 部分しか用いない) template vector enumerate_completely_multiplicative(int n, const auto &f_primepower) { LinearSieve::extend(n); vector res(n + 1); if (n >= 1) res[1] = 1; for (int d = 2; d <= n; d++) { int p = LinearSieve::_lpf[d].p; if (d == p) res[d] = f_primepower(PrimePower(p, 1, p)); else res[d] = res[p] * res[d / p]; } return res; } // 乗法的関数: f(1) = 1, f(p1^e1 ... pk^ek) = f(p1^e1) ... f(pk^ek) // f(p^e) が O(1) 時間で計算できるとき f(1), ..., f(n) を O(n) 時間で列挙 // f_primepower の引数は PrimePower 型 template vector enumerate_multiplicative(int n, const auto &f_primepower) { LinearSieve::extend(n); vector res(n + 1); if (n >= 1) res[1] = 1; for (int d = 2; d <= n; d++) { const PrimePower &lpf_d = LinearSieve::_lpf[d]; int pe = lpf_d.pe; if (d == pe) res[d] = f_primepower(lpf_d); else res[d] = res[d / pe] * res[pe]; } return res; } // ------ 完全乗法的 ----- // 単位元: ε(1) = 1, otherwise 0 template const auto e_primepower = [](const PrimePower

&q) -> T { return q.pe == 1 ? 1 : 0; }; // ゼータ関数 ζ(s): 1(n) = 1 template const auto zeta_primepower = [](const PrimePower

&q) -> T { return 1; }; // 恒等写像 ζ(s-1): id(n) = n template const auto id_primepower = [](const PrimePower

&q) -> T { return q.pe; }; // f(n) = n^k (k >= 0) // T は modint を想定 // 1 から n までの列挙にかかる計算量: O(n log k / log n) template const auto pow_primepower = [](ll k) { return [&](const PrimePower

&q) -> T { return T(q.pe).pow(k); }; }; // f(n) = n^{-k} (k >= 0) // 逆元がないときは 0 とする // T は modint を想定 // 1 から n までの列挙にかかる計算量: O(n log k) template const auto pow_inv_primepower = [](ll k) { return [&](const PrimePower

&q) -> T { if (internal::is_prime_constexpr(T::mod())) return T(q.pe).pow(safemod(-k, T::mod() - 1)); else return T::mod() % q.p == 0 ? T(0) : T(q.pe).inv().pow(k); }; }; // ----- 乗法的 ----- // メビウス関数: μ(s) = 1/ζ(s) = prod_{p} (1-p) // 重複する素因数があれば 0、相異なる素因数の積のとき、素因数が偶数個なら 1、奇数個なら -1 template const auto mobius_primepower = [](const PrimePower

&q) -> T { return q.e == 0 ? 1 : q.e == 1 ? -1 : 0; }; // 約数個数関数: σ_0(s) = ζ(s)ζ(s) template const auto divisor_count_primepower = [](const PrimePower

&q) -> T { return 1 + q.e; }; // 約数総和関数: σ_1(s) = ζ(s)ζ(s-1) template const auto divisor_sum_primepower = [](const PrimePower

&q) -> T { return (q.pe * q.p - 1) / (q.p - 1); }; // 約数の k 乗和: σ_k(s) = ζ(s)ζ(s-k) // f(p^e) = 1 + p^k + (p^k)^2 + ... + (p^k)^e // T は modint を想定 // 素べき部分の計算量: O(log k + e) template const auto divisor_k_primepower = [](ll k) { return [&](const PrimePower

&q) -> T { T pk = T(q.p).pow(k); T r = 1, res = 0; for (int i = 0; i <= q.e; i++) { res += r; r *= pk; } return res; }; }; // オイラー関数 (トーシェント関数): φ(s) = ζ(s-1)/ζ(s) // n と互いに素な 1 以上 n 以下の整数の個数 // n * prod_{p}(1 - 1/p) template const auto totient_primepower = [](const PrimePower

&q) -> T { return q.pe - q.pe / q.p; }; // ----- 累積和が簡単に求まるもの ----- // 単位元: ε(1) = 1, otherwise 0 template const auto e_prefix_sum = [](ll n) -> T { return 1; }; // ゼータ関数 ζ(s): 1(n) = 1 template const auto zeta_prefix_sum = [](ll n) -> T { return n; }; // 恒等写像 ζ(s-1): id(n) = n template const auto id_prefix_sum = [](ll n) -> T { return n % 2 == 0 ? T(n / 2) * T(n + 1) : T(n) * T((n + 1) / 2); }; }; using namespace multiplicative; // 同じ数で何回も割るとき、modint 等で毎回 log がつかないようにしつつ、整数でも動くようにする template ::is_integer> struct Div { T val, inv; Div() {} Div(const T &val) : val(val), inv(1 / val) {} T ÷(T &x) { return use_inv ? x *= inv : x /= val; } T divided(T x) { return x /= val; } }; // prefix 形式 // f(1), ..., f(k) のテーブルを保持 template struct DirichletP { public: int k; vector f; bool is_multiplicative; DirichletP() {} // 乗法的関数の場合 DirichletP(const int &k, const auto &f_primepower, const bool &is_completely_multiplicative = false) : k(k), is_multiplicative(true) { if (is_completely_multiplicative) f = multiplicative::enumerate_completely_multiplicative(k, f_primepower); else f = multiplicative::enumerate_multiplicative(k, f_primepower); } DirichletP(const vector &f, const bool &is_multiplicative = false) : k((int)f.size() - 1), f(f), is_multiplicative(is_multiplicative) {} private: static vector prod_arbitrary(const vector &a, const vector &b) { assert(a.size() == b.size() && "DirichletP::prod_arbitrary"); const int _k = (int)a.size() - 1; vector c(_k + 1); for (int i = 1; i <= _k; i++) for (int j = 1; i * j <= _k; j++) c[i * j] += a[i] * b[j]; return c; }; static vector prod_half_multiplicative(const vector &a, const vector &b) { assert(a.size() == b.size() && "DirichletP::prod_half_multiplicative"); const int _k = (int)a.size() - 1; LinearSieve::extend(_k); vector c(b); for (const int &p : LinearSieve::primes) { if (p > _k) break; for (int i = _k / p; i > 0; i--) { int j = i * p; ll q = p; // q = p^e int m = i; // j = p^e * m while (q <= _k) { c[j] += a[q] * c[m]; if (m % p != 0) break; q *= p, m /= p; } } } return c; }; static vector prod_multiplicative(const vector &a, const vector &b) { assert(a.size() == b.size() && "DirichletP::prod_multiplicative"); const int _k = (int)a.size() - 1; auto f_primepower = [&](const PrimePower &q) { T res = 0; for (int r = q.pe, s = 1; r >= 1; r /= q.p, s *= q.p) res += a[r] * b[s]; return res; }; return multiplicative::enumerate_multiplicative(_k, f_primepower); }; static vector div_multiplicative(const vector &c, const vector &a) { assert(a.size() == c.size() && "DirichletP::div_multiplicative"); const int _k = (int)a.size() - 1; if (_k == 0) return vector(1); vector b(_k + 1); b[1] = 1; for (const int &p : LinearSieve::primes) { for (ll pe = p; pe <= _k; pe *= p) { b[pe] = c[pe]; for (int q = 1, r = pe; q < pe; q *= p, r /= p) b[pe] -= b[q] * a[r]; } } return multiplicative::enumerate_multiplicative(_k, [&](const PrimePower &q) -> T { return b[q.pe]; }); } static vector div_arbitrary(const vector &c, const vector &a) { assert(a.size() == c.size() && "DirichletP::div_arbitrary"); const int _k = (int)a.size() - 1; if (_k == 0) return vector(1); assert(a[1] != 0 && "div_arbitrary"); vector b(c.begin(), c.end()); Div div_a1(a[1]); for (int i = 1; i <= _k; i++) { div_a1.divide(b[i]); for (int j = 2; i * j <= _k; j++) b[i * j] -= a[j] * b[i]; } return b; } public: // 乗算 // 両方が乗法的なら O(k) // 一方が乗法的なら O(k loglog k) // それ以外なら O(k log k) DirichletP operator*(const DirichletP &other) const { vector res_f; if (this->is_multiplicative && other.is_multiplicative) res_f = prod_multiplicative(this->f, other.f); else if (this->is_multiplicative) res_f = prod_half_multiplicative(this->f, other.f); else if (other.is_multiplicative) res_f = prod_half_multiplicative(other.f, this->f); else res_f = prod_arbitrary(this->f, other.f); bool res_is_multiplicative = this->is_multiplicative && other.is_multiplicative; return DirichletP(res_f, res_is_multiplicative); } DirichletP &operator*=(const DirichletP &other) { return *this = *this * other; } // 逆元 // 乗法的なら O(k), そうでない場合 O(k log k) DirichletP inv() const { DirichletP e(k, multiplicative::e_primepower, true); return e / *this; } // 除算 c/a // a, c ともに乗法的なら O(k) // a のみ乗法的なら O(k loglog k) // それ以外なら O(k log k) DirichletP operator/(const DirichletP &other) const { vector res_f; if (this->is_multiplicative && other.is_multiplicative) res_f = div_multiplicative(this->f, other.f); else if (other.is_multiplicative) res_f = prod_half_multiplicative(other.inv().f, this->f); else res_f = div_arbitrary(this->f, other.f); bool res_is_multiplicative = this->is_multiplicative && other.is_multiplicative; return DirichletP(res_f, res_is_multiplicative); } DirichletP &operator/=(const DirichletP &other) { return *this = *this / other; } DirichletP pow(ll x) const { DirichletP res(k, multiplicative::e_primepower, true); DirichletP tmp(*this); while (x > 0) { if (x & 1) res *= tmp; tmp *= tmp; x >>= 1; } return res; } }; // prefix-quotient 形式 // f(1), ..., f(k) のテーブルに加え、 // F(1), ..., F(k) および F(⌊n/1⌋), ..., F(⌊n/l⌋) のテーブルも保持 // d に 2 以上を指定した場合は F(⌊(n/j)^{1/d}⌋) を保持 // k >= l および ⌊(n/(l+1))^{1/d}⌋ <= k を仮定 template struct DirichletPQ { public: DirichletP pre; ll n; int l; vector F, qF; // F(⌊(n/i)^{1/d}⌋) を得る T &getqF(ll i) { assert(1 <= i && i <= n); if (i <= l) return qF[i]; else return F[iroot(n / i, d)]; } DirichletPQ() {} DirichletPQ(const DirichletP &pre, const ll &n, const int &l, const auto &getF) : pre(pre), n(n), l(l), F(pre.k + 1), qF(l + 1) { assert(pre.k >= l && "DirichletPQ"); assert(iroot(n / (l + 1), d) <= pre.k && "DirichletPQ"); for (int i = 1; i <= pre.k; i++) F[i] = F[i - 1] + pre.f[i]; for (int i = 1; i <= l; i++) qF[i] = getF(iroot(n / i, d)); } DirichletPQ(const DirichletP &pre, const ll &n, const vector &qF) : pre(pre), n(n), l((int)qF.size() - 1), F(pre.k + 1), qF(qF) { assert(pre.k >= l && "DirichletPQ"); assert(iroot(n / (l + 1), d) <= pre.k && "DirichletPQ"); for (int i = 1; i <= pre.k; i++) F[i] = F[i - 1] + pre.f[i]; } // 乗算 // - いずれも乗法的なら O(k + √(nl)) // - このとき k = O(n^{2/3}), l = O(n^{1/3}) が最適 // - 一方のみ乗法的なら O(k loglog k + √(nl)) // - このとき k = O((n/loglog n)^{2/3}), l = O(n^{1/3}(loglog n)^{2/3}) が最適 // - 乗法的でないなら O(k log k + √(nl)) // - このとき k = O((n/log n)^{2/3}), l = O(n^{1/3}(log n)^{2/3}) が最適 // 一般の d では、O(k + (nl)^{1/2d}) 等で、k = O(n^{2/(2d+1)}), l = O(n^{1/(2d+1)}) DirichletPQ operator*(const DirichletPQ &other) const { assert(n == other.n && pre.k == other.pre.k && l == other.l && "DirichletPQ::operator*"); DirichletP res_pre = pre * other.pre; vector res_qF(l + 1); for (ll j = 1; j <= l; j++) { const int m = iroot(n / j, 2 * d); for (ll i = 1; i <= m; i++) res_qF[j] += pre.f[i] * other.getqF(ipow(i, d) * j) + other.pre.f[i] * (getqF(ipow(i, d) * j) - F[m]); } return DirichletPQ(res_pre, n, res_qF); } DirichletPQ &operator*=(const DirichletPQ &other) { return *this = *this * other; } // 除算 // - いずれも乗法的なら O(k + √(nl)) // - このとき k = O(n^{2/3}), l = O(n^{1/3}) が最適 // - 一方のみ乗法的なら O(k loglog k + √(nl)) // - このとき k = O((n/loglog n)^{2/3}), l = O(n^{1/3}(loglog n)^{2/3}) が最適 // - 乗法的でないなら O(k log k + √(nl)) // - このとき k = O((n/log n)^{2/3}), l = O(n^{1/3}(log n)^{2/3}) が最適 // 一般の d では、O(k + (nl)^{1/2d}) 等で、k = O(n^{2/(2d+1)}), l = O(n^{1/(2d+1)}) DirichletPQ operator/(const DirichletPQ &other) const { assert(n == other.n && pre.k == other.pre.k && l == other.l && "DirichletPQ::operator/"); if (pre.k == 0) return DirichletPQ(DirichletP(vector(1)), n, vector(l + 1)); assert(other.pre.f[1] != 0 && "DirichletPQ::operator/"); DirichletPQ res(pre / other.pre, n, qF); Div div_a1(other.pre.f[1]); for (ll j = l; j >= 1; j--) { const int m = iroot(n / j, 2 * d); for (ll i = 1; i <= m; i++) res.qF[j] -= res.pre.f[i] * (other.getqF(ipow(i, d) * j) - other.F[m]); for (ll i = 2; i <= m; i++) res.qF[j] -= other.pre.f[i] * res.getqF(ipow(i, d) * j); div_a1.divide(res.qF[j]); } return res; } DirichletPQ &operator/=(const DirichletPQ &other) { return *this = *this / other; } // 逆元 DirichletPQ inv() const { DirichletPQ e(DirichletP(pre.k, multiplicative::e_primepower, true), n, l, multiplicative::e_prefix_sum); return e / *this; } DirichletPQ pow(ll x) const { DirichletPQ res(DirichletP(pre.k, multiplicative::e_primepower, true), n, l, multiplicative::e_prefix_sum); DirichletPQ tmp(*this); while (x > 0) { if (x & 1) res *= tmp; tmp *= tmp; x >>= 1; } return res; } }; // 積 c = a * b について C(n) ひとつを求める // 計算量: O(k + l) // k = l = ⌊√n⌋ がオーダー最適で O(√n) template T prodFn(const DirichletPQ &a, const DirichletPQ &b) { assert(a.n == b.n && a.pre.k == b.pre.k && a.l == b.l && "prodFn"); T ans = 0; for (int i = 1; i <= a.l; i++) ans += a.pre.f[i] * b.getqF(i); for (int j = 1; j <= a.pre.k; j++) ans += b.pre.f[j] * (a.getqF(j) - a.F[a.l]); return ans; } // 入力: **完全**乗法的関数 f について、PQ 形式および素数部分の値 f(p) // 出力: f の素数部分のみを取り出した関数の PQ 形式 // 計算量: k = l = ⌊√n⌋ とするとき O(n^{3/4} / log n) template DirichletPQ lucy_dp(const DirichletPQ &f_pq, const auto &f_primepower) { // dp_i(v) := 2 以上 v 以下の整数のうち i 以下の素数でふるわれない整数に対する f の和 // i が素数でないか、v < i^2 の場合、dp_i(v) = dp_{i-1}(v) // それ以外の場合、dp_i(v) = dp_{i-1}(v) - f(i) ( dp_{i-1}(⌊v/i⌋) - dp_{i-1}(i-1) ) vector res_qF(f_pq.l + 1); for (int j = 1; j <= f_pq.l; j++) res_qF[j] = f_pq.qF[j] - 1; DirichletPQ res(f_pq.pre, f_pq.n, res_qF); for (int v = 1; v <= f_pq.pre.k; v++) res.F[v] -= 1; for (ll i = 2; i * i <= f_pq.n; i++) { // この時点で dp_{i-1} が入っている if (!LinearSieve::is_prime(i)) continue; T fi = f_primepower(PrimePower(i, 1, i)); T dp_i_minus_1 = res.F[i - 1]; for (int j = 1; j <= f_pq.l; j++) { dump(i, j, f_pq.n / j, res.getqF(j)); // dp(⌊n/j⌋) を更新 if (f_pq.n / j < i * i) break; res.getqF(j) -= fi * (res.getqF(i * j) - dp_i_minus_1); } for (int v = f_pq.pre.k; v >= 1; v--) { // dp(v) を更新 if (v < i * i) break; res.F[v] -= fi * (res.F[v / i] - dp_i_minus_1); } } for (int i = 2; i <= f_pq.pre.k; i++) { if (!LinearSieve::is_prime(i)) res.pre.f[i] = 0; } return res; } void init() {} void main2() { /* segmented sieve https://atcoder.jp/contests/abc227/tasks/abc227_g LL(N, K); SegmentedSieve ssv(N - K + 1, N); vl cnt(max(K, (ll)sqrtl(N)) + 1, 0); mint ans = 1; rep(x, 1, K + 1) { auto facs = ssv.factorize(x); dump(x, facs); for (cauto &fac : facs) cnt.at(fac.p) -= fac.e; } rep(x, N - K + 1, N + 1) { auto facs = ssv.factorize(x); dump(x, facs); for (cauto &fac : facs) { if (fac.p < (int)cnt.size()) cnt.at(fac.p) += fac.e; else ans *= 2; } } rep(x, cnt.size()) ans *= 1 + cnt.at(x); PRINT(ans); //*/ /* gcd convolution https://judge.yosupo.jp/problem/gcd_convolution INT(N); VEC(mint, N, A, B); A.insert(A.begin(), 0), B.insert(B.begin(), 0); auto C = gcd_convolution(A, B); C.erase(C.begin()); PRINTVEC(C); //*/ /* lcm convolution https://judge.yosupo.jp/problem/lcm_convolution INT(N); VEC(mint, N, A, B); A.insert(A.begin(), 0), B.insert(B.begin(), 0); auto C = lcm_convolution(A, B); C.erase(C.begin()); PRINTVEC(C); //*/ /* enumerate multiplicative LL(N); dump(enumerate_completely_multiplicative(N, e_primepower) | cp::index()); dump(enumerate_completely_multiplicative(N, zeta_primepower) | cp::index()); dump(enumerate_completely_multiplicative(N, id_primepower) | cp::index()); dump(enumerate_completely_multiplicative>(N, pow_primepower>(3)) | cp::index()); dump(enumerate_completely_multiplicative>(N, pow_inv_primepower>(3)) | cp::index()); dump(enumerate_multiplicative(N, mobius_primepower) | cp::index()); dump(enumerate_multiplicative(N, divisor_count_primepower) | cp::index()); dump(enumerate_multiplicative(N, divisor_sum_primepower) | cp::index()); dump(enumerate_multiplicative(N, divisor_k_primepower(2)) | cp::index()); dump(enumerate_multiplicative(N, totient_primepower) | cp::index()); //*/ /* dirichletP // declared as multiplicative DirichletP mobius_p(100, mobius_primepower); DirichletP totient_p(100, totient_primepower); DirichletP zeta_p(100, zeta_primepower); DirichletP id_p(100, id_primepower); // declared as non-multiplicative DirichletP mobius_p_nm(mobius_p.f); DirichletP totient_p_nm(totient_p.f); DirichletP zeta_p_nm(zeta_p.f); DirichletP id_p_nm(id_p.f); // totient * zeta = id dump(id_p.f | cp::index()); for (cauto &T : {totient_p, totient_p_nm}) { for (cauto &Z : {zeta_p, zeta_p_nm}) { dump(T.f | cp::index(), Z.f | cp::index(), (T * Z).f | cp::index(), T.is_multiplicative, Z.is_multiplicative); assert((T * Z).f == id_p.f); } } for (cauto &T : {totient_p, totient_p_nm}) { for (cauto &M : {mobius_p, mobius_p_nm}) { dump(T.f | cp::index(), M.f | cp::index(), (T * M).f | cp::index(), T.is_multiplicative, M.is_multiplicative); assert((T / M).f == id_p.f); } } exit(0); //*/ /* https://atcoder.jp/contests/abc172/tasks/abc172_d // prodFn // sum[ij <= N] ij LL(N); ll M = sqrtl(N); DirichletPQ id(DirichletP(M, id_primepower), N, M, id_prefix_sum); ll ans = prodFn(id, id); PRINT(ans); //*/ /* https://atcoder.jp/contests/arc116/tasks/arc116_c // pow // sum((zeta)^N)[M] LL(N, M); ll L = pow(M, 1.0 / 3.0), K = divceil(M, L); DirichletPQ zeta(DirichletP(K, zeta_primepower), M, L, zeta_prefix_sum); mint ans = zeta.pow(N).getqF(1); PRINT(ans); //*/ /* sum of totient https://judge.yosupo.jp/problem/sum_of_totient_function // zeta * phi = id LL(N); ll L = pow(N, 1.0 / 3.0), K = divceil(N, L); DirichletPQ zeta(DirichletP(K, zeta_primepower), N, L, zeta_prefix_sum); DirichletPQ id(DirichletP(K, id_primepower), N, L, id_prefix_sum); DirichletPQ phi = id / zeta; PRINT(phi.getqF(1)); //*/ /* counting square-frees https://judge.yosupo.jp/problem/counting_squarefrees // ζ(s)/ζ(2s) の累積和なので sum[ij <= N, j は平方数] μ(√j) // = sum[k^2 <= N] μ(k) ⌊N/k^2⌋ // = sum[x: ∃k, x = ⌊N/k^2⌋] x( M(⌊√(N/x)⌋) - M(⌊√(N/(x+1))⌋) ) LL(N); ll L = 3 * pow(N, 1.0 / 5.0), K = max(L, (ll)sqrtl(divceil(N, L))); dump(N, K, L); DirichletPQ zeta(DirichletP(K, zeta_primepower), N, L, zeta_prefix_sum); auto mobius = zeta.inv(); ll ans = 0; for (ll k = 1; k * k <= N;) { ll x = N / (k * k); dump(k, x); ans += x * (mobius.getqF(x) - (x == N ? 0 : mobius.getqF(x + 1))); k = sqrtl(N / x) + 1; } PRINT(ans); //*/ /* counting primes https://judge.yosupo.jp/problem/counting_primes LL(N); ll M = sqrtl(N); DirichletPQ zeta(DirichletP(M, zeta_primepower), N, M, zeta_prefix_sum); auto zeta_prime = lucy_dp(zeta, zeta_primepower); PRINT(zeta_prime.getqF(1)); //*/ //* sum of primes https://yukicoder.me/problems/no/713 LL(N); ll M = sqrtl(N); DirichletPQ id(DirichletP(M, id_primepower), N, M, id_prefix_sum); auto id_prime = lucy_dp(id, id_primepower); PRINT(id_prime.getqF(1)); //*/ } void test() { /* #ifdef LOCAL rep(t, 100000) { dump(t); // ----- generate cases ----- ll N = 1 + rand() % 5; ll K = -10 + rand() % 21; vl A(N); rep(i, N) A.at(i) = -10 + rand() % 21; // -------------------------- // ------ check output ------ auto god = naive(K, A); auto ans = solve(K, A); if (god != ans) { dump(N, K, A); dump(god, ans); exit(0); } // -------------------------- } dump("ok"); #endif //*/ } int main() { cauto CERR = [](cauto &val) { #ifndef BOJ cerr << val; #endif }; #if defined FAST_IO and not defined LOCAL CERR("[FAST_IO]\n\n"); cin.tie(0); ios::sync_with_stdio(false); #endif cout << fixed << setprecision(20); test(); init(); #if defined AOJ_TESTCASE or (defined LOCAL and defined SINGLE_TESTCASE) CERR("[AOJ_TESTCASE]\n\n"); while (true) main2(); #elif defined SINGLE_TESTCASE CERR("[SINGLE_TESTCASE]\n\n"); main2(); #elif defined MULTI_TESTCASE CERR("[MULTI_TESTCASE]\n\n"); int T; cin >> T; while (T--) main2(); #endif }