#pragma region Macros #pragma GCC optimize("O3,unroll-loops") #pragma GCC target("sse,sse2,sse3,ssse3,sse4,fma,abm,mmx,avx,avx2") #include #include // #include // using namespace atcoder; using namespace std; using namespace __gnu_pbds; // #include // #include // namespace mp = boost::multiprecision; // using Bint = mp::cpp_int; // using Bdouble = mp::number>; #define pb emplace_back #define int ll #define endl '\n' #define sqrt __builtin_sqrt #define cbrt __builtin_cbrt #define hypot __builtin_hypot using ll = long long; using ld = long double; const ld PI = acosl(-1); const int INF = 1 << 30; const ll INFL = 1LL << 61; const int MOD = 998244353; // const int MOD = 1000000007; const ld EPS = 1e-10; const bool equals(ld a, ld b) { return fabs((a) - (b)) < EPS; } const vector dx = {0, 1, 0, -1, 1, 1, -1, -1}; // → ↓ ← ↑ ↘ ↙ ↖ ↗ const vector dy = {1, 0, -1, 0, 1, -1, -1, 1}; struct Edge { int from, to; ll cost; Edge(int to, ll cost) : to(to), cost(cost) {} Edge(int from, int to, ll cost) : from(from), to(to), cost(cost) {} }; chrono::system_clock::time_point start, now; __attribute__((constructor)) void constructor() { ios::sync_with_stdio(false); cin.tie(nullptr); cout << fixed << setprecision(10); start = chrono::system_clock::now(); } __int128_t POW(__int128_t x, int n) { __int128_t ret = 1; assert(n >= 0); if (x == 1 or n == 0) ret = 1; else if (x == -1 && n % 2 == 0) ret = 1; else if (x == -1) ret = -1; else if (n % 2 == 0) { assert(x < INFL); ret = POW(x * x, n / 2); } else { assert(x < INFL); ret = x * POW(x, n - 1); } return ret; } int per(int x, int y) { // x = qy + r (0 <= r < y) を満たすq assert(y != 0); if (x >= 0 && y > 0) return x / y; if (x >= 0 && y < 0) return x / y - (x % y < 0); if (x < 0 && y < 0) return x / y + (x % y < 0); return x / y - (x % y < 0); // (x < 0 && y > 0) } // int perl(ld x, ld y) { // perld(4.5, 2.1) = 2 // TODO // if (-EPS < x && x < 0 or 0 < x && x < EPS) x = 0; // if (-EPS < y && y < 0 or 0 < x && x < EPS) y = 0; // assert(!equals(y, 0)); // if (x >= 0 && y > 0) return floor(x / y)+EPS; // if (x >= 0 && y < 0) return floor(x / y) - (x - floor(x/y)*y < -EPS); // if (x < 0 && y < 0) return floor(x / y) + (x - floor(x/y)*y < -EPS); // return floor(x / y) - (x - floor(x/y)*y < -EPS); // (x < 0 && y > 0) // } int mod(int x, int y) { // x = qy + r (0 <= r < y) を満たすr assert(y != 0); if (x >= 0) return x % y; __int128_t ret = x % y; // (x < 0) ret += (__int128_t)abs(y) * INFL; ret %= abs(y); return ret; } // ld modl(ld x, ld y) { // TODO // assert(!equals(y, 0)); // if (x >= -EPS) return (x - floor(x/y)*y); // ld ret = x - floor(x/y)*y; // (x < 0) // ret += abs(y) * INFL; // TODO : オーバーフローする? // ret = x - floor(x/abs(y))*abs(y); // return ret; // } // int floor(int x, int y) { // TODO // assert(y != 0); // if (b < 0) a = -a, b = -b; // return a >= 0 ? a / b : (a + 1) / b - 1; // } // int ceil(int x, int y) { // TODO // assert(y != 0); // if (b < 0) a = -a, b = -b; // return a > 0 ? (a - 1) / b + 1 : a / b; // } // int floorl(ld x, ld y) { return 0; } // TODO // int ceill(ld x, ld y) { return 0; } // TODO // int gauss(int x, int y) { // assert(y != 0); // return x / y; // } // 整数部分(未verify) // int gauss(ld x, ld y) { return 0; } // TODO pair max(const pair &a, const pair &b) { if (a.first > b.first or a.first == b.first && a.second > b.second) { return a; } return b; } pair min(const pair &a, const pair &b) { if (a.first < b.first or a.first == b.first && a.second < b.second) { return a; } return b; } template bool chmax(T &a, const T& b) { if (a < b) { a = b; return true; } return false; } template bool chmin(T &a, const T& b) { if (a > b) { a = b; return true; } return false; } template T mid(T a, T b, T c) { return a + b + c - max({a, b, c}) - min({a, b, c}); } template void sort(T &a, T &b, T &c, bool rev = false) { if (rev == false) { if (a > b) swap(a, b); if (a > c) swap(a, c); if (b > c) swap(b, c); } else { if (c > b) swap(c, b); if (c > a) swap(c, a); if (b > a) swap(b, a); } } template void sort(T &a, T &b, T &c, T &d, bool rev = false) { if (rev == false) { if (a > b) swap(a, b); if (a > c) swap(a, c); if (a > d) swap(a, d); if (b > c) swap(b, c); if (b > d) swap(b, d); if (c > d) swap(c, d); } else { if (d > c) swap(d, c); if (d > b) swap(d, b); if (d > a) swap(d, a); if (c > b) swap(c, b); if (c > a) swap(c, a); if (b > a) swap(b, a); } } int countl_zero(int x) { return __builtin_clzll(x); } int countl_one(int x) { int ret = 0; while (x % 2) { x /= 2; ret++; } return ret; } int countr_zero(int x) { return __builtin_ctzll(x); } int countr_one(int x) { int ret = 0, k = 63 - __builtin_clzll(x); while (k != -1 && (x & (1LL << k))) { k--; ret++; } return ret; } int popcount(int x) { return __builtin_popcountll(x); } int unpopcount(int x) { return 64 - __builtin_clzll(x) - __builtin_popcountll(x); } int top_bit(int x) { return 63 - __builtin_clzll(x);} // 2^kの位 int bot_bit(int x) { return __builtin_ctz(x);} // 2^kの位 int MSB(int x) { return 1 << (63 - __builtin_clzll(x)); } // mask int LSB(int x) { return (x & -x); } // mask int bit_width(int x) { return 64 - __builtin_clzll(x); } // 桁数 int ceil_log2(int x) { return 63 - __builtin_clzll(x); } int bit_floor(int x) { return 1 << (63 - __builtin_clzll(x)); } int floor_log2(int x) { return 64 - __builtin_clzll(x-1); } int bit_ceil(int x) { return 1 << (64 - __builtin_clzll(x-1)) - (x==1); } int hamming(int a, int b) { return popcount(a ^ b); } int compcnt(int x) { return (popcount(x^(x >> 1)) + (x&1)) / 2; } class UnionFind { public: UnionFind() = default; UnionFind(int N) : par(N), sz(N, 1) { iota(par.begin(), par.end(), 0); } int root(int x) { if (par[x] == x) return x; return (par[x] = root(par[x])); } bool unite(int x, int y) { int rx = root(x); int ry = root(y); if (rx == ry) return false; if (sz[rx] < sz[ry]) swap(rx, ry); sz[rx] += sz[ry]; par[ry] = rx; return true; } bool issame(int x, int y) { return (root(x) == root(y)); } int size(int x) { return sz[root(x)]; } vector> groups(int N) { vector> G(N); for (int x = 0; x < N; x++) { G[root(x)].push_back(x); } G.erase( remove_if(G.begin(), G.end(), [&](const vector& V) { return V.empty(); }), G.end()); return G; } private: vector par; vector sz; }; template class Modint{ public: int val = 0; Modint(int x = 0) { while (x < 0) x += mod; val = x % mod; } Modint(const Modint &r) { val = r.val; } Modint operator -() { return Modint(-val); } // 単項 Modint operator +(const Modint &r) { return Modint(*this) += r; } Modint operator +(const int &q) { Modint r(q); return Modint(*this) += r; } Modint operator -(const Modint &r) { return Modint(*this) -= r; } Modint operator -(const int &q) { Modint r(q); return Modint(*this) -= r; } Modint operator *(const Modint &r) { return Modint(*this) *= r; } Modint operator *(const int &q) { Modint r(q); return Modint(*this) *= r; } Modint operator /(const Modint &r) { return Modint(*this) /= r; } Modint operator /(const int &q) { Modint r(q); return Modint(*this) /= r; } Modint& operator ++() { val++; if (val >= mod) val -= mod; return *this; } // 前置 Modint operator ++(signed) { ++*this; return *this; } // 後置 Modint& operator --() { val--; if (val < 0) val += mod; return *this; } Modint operator --(signed) { --*this; return *this; } Modint &operator +=(const Modint &r) { val += r.val; if (val >= mod) val -= mod; return *this; } Modint &operator +=(const int &q) { Modint r(q); val += r.val; if (val >= mod) val -= mod; return *this; } Modint &operator -=(const Modint &r) { if (val < r.val) val += mod; val -= r.val; return *this; } Modint &operator -=(const int &q) { Modint r(q); if (val < r.val) val += mod; val -= r.val; return *this; } Modint &operator *=(const Modint &r) { val = val * r.val % mod; return *this; } Modint &operator *=(const int &q) { Modint r(q); val = val * r.val % mod; return *this; } Modint &operator /=(const Modint &r) { int a = r.val, b = mod, u = 1, v = 0; while (b) {int t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v);} val = val * u % mod; if (val < 0) val += mod; return *this; } Modint &operator /=(const int &q) { Modint r(q); int a = r.val, b = mod, u = 1, v = 0; while (b) {int t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v);} val = val * u % mod; if (val < 0) val += mod; return *this; } bool operator ==(const Modint& r) { return this -> val == r.val; } bool operator <(const Modint& r) { return this -> val < r.val; } bool operator >(const Modint& r) { return this -> val > r.val; } bool operator !=(const Modint& r) { return this -> val != r.val; } }; using mint = Modint; // using Mint = modint998244353; istream &operator >>(istream &is, mint& x) { int t; is >> t; x = t; return (is); } ostream &operator <<(ostream &os, const mint& x) { return os << x.val; } mint modpow(const mint &x, int n) { assert(n >= 0); // TODO: n <= -1 if (n == 0) return 1; mint t = modpow(x, n / 2); t = t * t; if (n & 1) t = t * x; return t; } int modpow(__int128_t x, int n, int mod) { assert(n >= 0 && mod > 0); // TODO: n <= -1 __int128_t ret = 1; while (n > 0) { if (n % 2 == 1) ret = ret * x % mod; x = x * x % mod; n /= 2; } return ret; } int modinv(__int128_t x, int mod) { assert(mod > 0 && x > 0); if (x == 1) return 1; return mod - modinv(mod % x, mod) * (mod / x) % mod; } istream &operator >>(istream &is, __int128_t& x) { string S; is >> S; __int128_t ret = 0; int f = 1; if (S[0] == '-') f = -1; for (int i = 0; i < S.length(); i++) if ('0' <= S[i] && S[i] <= '9') ret = ret * 10 + S[i] - '0'; x = ret * f; return (is); } ostream &operator <<(ostream &os, __int128_t x) { ostream::sentry s(os); if (s) { __uint128_t tmp = x < 0 ? -x : x; char buffer[128]; char *d = end(buffer); do { --d; *d = "0123456789"[tmp % 10]; tmp /= 10; } while (tmp != 0); if (x < 0) { --d; *d = '-'; } int len = end(buffer) - d; if (os.rdbuf()->sputn(d, len) != len) { os.setstate(ios_base::badbit); } } return os; } __int128_t stoll(string &S) { __int128_t ret = 0; int f = 1; if (S[0] == '-') f = -1; for (int i = 0; i < S.length(); i++) if ('0' <= S[i] && S[i] <= '9') ret = ret * 10 + S[i] - '0'; return ret * f; } __int128_t gcd(__int128_t a, __int128_t b) { return b ? gcd(b, a % b) : a; } __int128_t lcm(__int128_t a, __int128_t b) { return a / gcd(a, b) * b; // lcmが__int128_tに収まる必要あり } string to_string(ld x, int k) { // xの小数第k位までをstring化する assert(k >= 0); stringstream ss; ss << setprecision(k + 2) << x; string s = ss.str(); if (s.find('.') == string::npos) s += '.'; int pos = s.find('.'); for (int i = 0; k >= (int)s.size() - 1 - pos; i++) s += '0'; s.pop_back(); if (s.back() == '.') s.pop_back(); return s; // stringstream ss; // 第k+1位を四捨五入して第k位まで返す // ss << setprecision(k + 1) << x; // string s = ss.str(); // if (s.find('.') == string::npos) s += '.'; // int pos = s.find('.'); // for (int i = 0; k > (int)s.size() - 1 - pos; i++) s += '0'; // if (s.back() == '.') s.pop_back(); // return s; } string to_string(__int128_t x) { string ret = ""; if (x < 0) { ret += "-"; x *= -1; } while (x) { ret += (char)('0' + x % 10); x /= 10; } reverse(ret.begin(), ret.end()); return ret; } string to_string(char c) { string s = ""; s += c; return s; } struct SXor128 { uint64_t x = 88172645463325252LL; unsigned Int() { x = x ^ (x << 7); return x = x ^ (x >> 9); } unsigned Int(unsigned mod) { x = x ^ (x << 7); x = x ^ (x >> 9); return x % mod; } unsigned Int(unsigned l, unsigned r) { x = x ^ (x << 7); x = x ^ (x >> 9); return x % (r - l + 1) + l; } double Double() { return double(Int()) / UINT_MAX; } } rnd; struct custom_hash { static uint64_t splitmix64(uint64_t x) { x += 0x9e3779b97f4a7c15; x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; x = (x ^ (x >> 27)) * 0x94d049bb133111eb; return x ^ (x >> 31); } size_t operator()(uint64_t x) const { static const uint64_t FIXED_RANDOM = chrono::steady_clock::now().time_since_epoch().count(); return splitmix64(x + FIXED_RANDOM); } }; template size_t HashCombine(const size_t seed,const T &v){ return seed^(hash()(v)+0x9e3779b9+(seed<<6)+(seed>>2)); } template struct hash>{ size_t operator()(const pair &keyval) const noexcept { return HashCombine(hash()(keyval.first), keyval.second); } }; template struct hash>{ size_t operator()(const vector &keyval) const noexcept { size_t s=0; for (auto&& v: keyval) s=HashCombine(s,v); return s; } }; template struct HashTupleCore{ template size_t operator()(const Tuple &keyval) const noexcept{ size_t s=HashTupleCore()(keyval); return HashCombine(s,get(keyval)); } }; template <> struct HashTupleCore<0>{ template size_t operator()(const Tuple &keyval) const noexcept{ return 0; } }; template struct hash>{ size_t operator()(const tuple &keyval) const noexcept { return HashTupleCore>::value>()(keyval); } }; vector _fac, _finv, _inv; void COMinit(int N) { _fac.resize(N + 1); _finv.resize(N + 1); _inv.resize(N + 1); _fac[0] = _fac[1] = 1; _finv[0] = _finv[1] = 1; _inv[1] = 1; for (int i = 2; i <= N; i++) { _fac[i] = _fac[i-1] * mint(i); _inv[i] = -_inv[MOD % i] * mint(MOD / i); _finv[i] = _finv[i - 1] * _inv[i]; } } mint FAC(int N) { if (N < 0) return 0; return _fac[N]; } mint COM(int N, int K) { if (N < K) return 0; if (N < 0 or K < 0) return 0; return _fac[N] * _finv[K] * _finv[N - K]; } mint PERM(int N, int K) { if (N < K) return 0; if (N < 0 or K < 0) return 0; return _fac[N] * _finv[N - K]; } mint NHK(int N, int K) { if (N == 0 && K == 0) return 1; return COM(N + K - 1, K); } #pragma endregion enum { NOTFOUND = 0xFFFFFFFFFFFFFFFFLLU }; class SuccinctBitVector { private: const uint64_t size; // ビットベクトルのサイズ static const uint64_t blockBitNum = 16; static const uint64_t LEVEL_L = 512; static const uint64_t LEVEL_S = 16; vector L; // 大ブロック vector S; // 小ブロック vector B; // ビットベクトル uint64_t numOne = 0; // 1bitの数 public: explicit SuccinctBitVector(const uint64_t n) : size(n) { const uint64_t s = (n + blockBitNum - 1) / blockBitNum + 1; // ceil(n, blockSize) this->B.assign(s, 0); this->L.assign(n / LEVEL_L + 1, 0); this->S.assign(n / LEVEL_S + 1, 0); } // B[pos] = bit void setBit(const uint64_t bit, const uint64_t pos) { // assert(bit == 0 or bit == 1); // assert(pos < this->size); const uint64_t blockPos = pos / blockBitNum; const uint64_t offset = pos % blockBitNum; if (bit == 1) { B[blockPos] |= (1LLU << offset); } else { B[blockPos] &= (~(1LLU << offset)); } } // B[pos] uint64_t access(const uint64_t pos) { // assert(pos < this->size); const uint64_t blockPos = pos / blockBitNum; const uint64_t offset = pos % blockBitNum; return ((B[blockPos] >> offset) & 1); } void build() { uint64_t num = 0; for (uint64_t i = 0; i <= size; i++) { if (i % LEVEL_L == 0) { L[i / LEVEL_L] = num; } if (i % LEVEL_S == 0) { S[i / LEVEL_S] = num - L[i / LEVEL_L]; } if (i != size and i % blockBitNum == 0) { num += this->popCount(this->B[i / blockBitNum]); } } this-> numOne = num; } // B[0, pos)のbitの数 uint64_t rank(const uint64_t bit, const uint64_t pos) { // assert(bit == 0 or bit == 1); // assert(pos <= this->size); if (bit) { return L[pos / LEVEL_L] + S[pos / LEVEL_S] + popCount(B[pos / blockBitNum] & ((1 << (pos % blockBitNum)) - 1)); } else { return pos - rank(1, pos); } } // rank番目のbitの位置 + 1(rankは1-origin) uint64_t select(const uint64_t bit, const uint64_t rank) { // assert(bit == 0 or bit == 1); // assert(rank > 0); if (bit == 0 and rank > this->size - this-> numOne) { return NOTFOUND; } if (bit == 1 and rank > this-> numOne) { return NOTFOUND; } // 大ブロックL内を検索 uint64_t large_idx = 0; { uint64_t left = 0; uint64_t right = L.size(); while (right - left > 1) { uint64_t mid = (left + right) / 2; uint64_t r = L[mid]; r = (bit) ? r : mid * LEVEL_L - L[mid]; if (r < rank) { left = mid; large_idx = mid; } else { right = mid; } } } // 小ブロックS内を検索 uint64_t small_idx = (large_idx * LEVEL_L) / LEVEL_S; { uint64_t left = (large_idx * LEVEL_L) / LEVEL_S; uint64_t right = min(((large_idx + 1) * LEVEL_L) / LEVEL_S, (uint64_t)S.size()); while (right - left > 1) { uint64_t mid = (left + right) / 2; uint64_t r = L[large_idx] + S[mid]; r = (bit) ? r :mid * LEVEL_S - r; if (r < rank) { left = mid; small_idx = mid; } else { right = mid; } } } // Bをブロック単位で順番に探索 uint64_t rank_pos = 0; { const uint64_t begin_block_idx = (small_idx * LEVEL_S) / blockBitNum; uint64_t total_bit = L[large_idx] + S[small_idx]; if (bit == 0) { total_bit = small_idx * LEVEL_S - total_bit; } for (uint64_t i = 0;; ++i) { uint64_t b = popCount(B[begin_block_idx + i]); if (bit == 0) { b = blockBitNum - b; } if (total_bit + b >= rank) { uint64_t block = (bit) ? B[begin_block_idx + i] : ~B[begin_block_idx + i]; rank_pos = (begin_block_idx + i) * blockBitNum + selectInBlock(block, rank - total_bit); break; } total_bit += b; } } return rank_pos + 1; } uint64_t getNumOne() const { return numOne; } void debug() { cout << "LEVEL_L(" << L.size() << ")" << endl; for (uint64_t i = 0 ; i < L.size(); ++i) { cout << L[i] << ", "; } cout << endl; cout << "LEVEL_S(" << S.size() << ")" << endl; for (uint64_t i = 0 ; i < S.size(); ++i) { cout << S[i] << ", "; } cout << endl; } private: uint64_t popCount(uint64_t x) { x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL; x = x + (x >> 8); x = x + (x >> 16); x = x + (x >> 32); return x & 0x7FLLU; } uint64_t selectInBlock(uint64_t x, uint64_t rank) { uint64_t x1 = x - ((x & 0xAAAAAAAAAAAAAAAALLU) >> 1); uint64_t x2 = (x1 & 0x3333333333333333LLU) + ((x1 >> 2) & 0x3333333333333333LLU); uint64_t x3 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FLLU; uint64_t pos = 0; for (;; pos += 8) { uint64_t rank_next = (x3 >> pos) & 0xFFLLU; if (rank <= rank_next) break; rank -= rank_next; } uint64_t v2 = (x2 >> pos) & 0xFLLU; if (rank > v2) { rank -= v2; pos += 4; } uint64_t v1 = (x1 >> pos) & 0x3LLU; if (rank > v1) { rank -= v1; pos += 2; } uint64_t v0 = (x >> pos) & 0x1LLU; if (v0 < rank) { rank -= v0; pos += 1; } return pos; } }; class WaveletMatrix { private: vector bit_arrays; vector begin_one; // 各bitに着目したときの1の開始位置 unordered_map begin_alphabet; // 最後のソートされた配列で各文字の開始位置 vector> cumulative_sum; // 各bitに着目したときの累積和 uint64_t size; // 与えられた配列のサイズ uint64_t maximum_element; // 文字数 uint64_t bit_size; // 文字を表すのに必要なbit数 public: WaveletMatrix (const vector &array) { // assert(array.size() > 0); size = array.size(); maximum_element = *max_element(array.begin(), array.end()) + 1; bit_size = get_num_of_bit(maximum_element); if (bit_size == 0) { bit_size = 1; } for (uint64_t i = 0; i < bit_size; ++i) { SuccinctBitVector sv(size); bit_arrays.push_back(sv); } this->begin_one.resize(bit_size); this->cumulative_sum.resize(bit_size + 1, vector(size + 1, 0)); for (uint64_t j = 0; j < array.size(); ++j) { this->cumulative_sum[0][j + 1] = this->cumulative_sum[0][j] + array[j]; } vector v(array); for (uint64_t i = 0; i < bit_size; ++i) { vector temp; // 0をtempにいれてく for (uint64_t j = 0; j < v.size(); ++j) { uint64_t c = v[j]; uint64_t bit = (c >> (bit_size - i - 1)) & 1; // 上からi番目のbit if (bit == 0) { temp.push_back(c); bit_arrays[i].setBit(0, j); } } this->begin_one[i] = temp.size(); // 1をtempにいれてく for (uint64_t j = 0; j < v.size(); ++j) { uint64_t c = v[j]; uint64_t bit = (c >> (bit_size - i - 1)) & 1; // 上からi番目のbit if (bit == 1) { temp.push_back(c); bit_arrays[i].setBit(1, j); } } for (uint64_t j = 0; j < temp.size(); ++j) { this->cumulative_sum[i + 1][j + 1] = this->cumulative_sum[i + 1][j] + temp[j]; } bit_arrays[i].build(); v = temp; } // ソートされた配列内での各文字の位置を取得 for (int i = v.size() - 1; i >= 0; --i) { this->begin_alphabet[v[i]] = i; } } // v[pos] uint64_t access(uint64_t pos) { if (pos >= this->size) { return NOTFOUND; } uint64_t c = 0; for (uint64_t i = 0; i < bit_arrays.size(); ++i) { uint64_t bit = bit_arrays[i].access(pos); // もとの数値のi番目のbit c = (c <<= 1) | bit; pos = bit_arrays[i].rank(bit, pos); if (bit) { pos += this->begin_one[i]; } } return c; } // i番目のcの位置 + 1を返す。rankは1-origin uint64_t select(uint64_t c, uint64_t rank) { // assert(rank > 0); if (c >= maximum_element) { return NOTFOUND; } if (this->begin_alphabet.find(c) == this->begin_alphabet.end()) { return NOTFOUND; } uint64_t index = this->begin_alphabet[c] + rank; for (uint64_t i = 0; i < bit_arrays.size(); ++i) { uint64_t bit = ((c >> i) & 1); // 下からi番目のbit if (bit == 1) { index -= this->begin_one[bit_size - i - 1]; } index = this->bit_arrays[bit_size - i - 1].select(bit, index); } return index; } // v[begin_pos, end_pos)で最大値のindexを返す uint64_t maxRange(uint64_t begin_pos, uint64_t end_pos) { return quantileRange(begin_pos, end_pos, end_pos - begin_pos - 1); } // v[begin_pos, end_pos)で最小値のindexを返す uint64_t minRange(uint64_t begin_pos, uint64_t end_pos) { return quantileRange(begin_pos, end_pos, 0); } // v[begin_pos, end_pos)でk番目に小さい数値のindexを返す(kは0-origin) // つまり小さい順に並べてk番目の値 uint64_t quantileRange(uint64_t begin_pos, uint64_t end_pos, uint64_t k) { if ((end_pos > size || begin_pos >= end_pos) || (k >= end_pos - begin_pos)) { return NOTFOUND; } uint64_t val = 0; for (uint64_t i = 0; i < bit_size; ++i) { const uint64_t num_of_zero_begin = bit_arrays[i].rank(0, begin_pos); const uint64_t num_of_zero_end = bit_arrays[i].rank(0, end_pos); const uint64_t num_of_zero = num_of_zero_end - num_of_zero_begin; // beginからendまでにある0の数 const uint64_t bit = (k < num_of_zero) ? 0 : 1; // k番目の値の上からi番目のbitが0か1か if (bit) { k -= num_of_zero; begin_pos = this->begin_one[i] + begin_pos - num_of_zero_begin; end_pos = this->begin_one[i] + end_pos - num_of_zero_end; } else { begin_pos = num_of_zero_begin; end_pos = num_of_zero_begin + num_of_zero; } val = ((val << 1) | bit); } uint64_t left = 0; for (uint64_t i = 0; i < bit_size; ++i) { const uint64_t bit = (val >> (bit_size - i - 1)) & 1; // 上からi番目のbit left = bit_arrays[i].rank(bit, left); // cのi番目のbitと同じ数値の数 if (bit) { left += this->begin_one[i]; } } const uint64_t rank = begin_pos + k - left + 1; return select(val, rank) - 1; } // v[0, pos)のcの数 uint64_t rank(uint64_t c, uint64_t pos) { // assert(pos < size); if (c >= maximum_element) { return 0; } if (this->begin_alphabet.find(c) == this->begin_alphabet.end()) { return 0; } for (uint64_t i = 0; i < bit_size; ++i) { uint64_t bit = (c >> (bit_size - i - 1)) & 1; // 上からi番目のbit pos = bit_arrays[i].rank(bit, pos); // cのi番目のbitと同じ数値の数 if (bit) { pos += this->begin_one[i]; } } uint64_t begin_pos = this->begin_alphabet[c]; return pos - begin_pos; } // v[begin_pos, end_pos)で[min, max)に入る値の個数 uint64_t rangeFreq(uint64_t begin_pos, uint64_t end_pos, uint64_t min_c, uint64_t max_c) { if ((end_pos > size || begin_pos >= end_pos) || (min_c >= max_c) || min_c >= maximum_element) { return 0; } const auto maxi_t = rankAll(max_c, begin_pos, end_pos); const auto mini_t = rankAll(min_c, begin_pos, end_pos); return get<1>(maxi_t) - get<1>(mini_t); } // v[0, pos)でcより小さい文字の数 uint64_t rankLessThan(uint64_t c, uint64_t begin, uint64_t end) { auto t = rankAll(c, begin, end); return get<1>(t); } // v[0, pos)でcより大きい文字の数 uint64_t rankMoreThan(uint64_t c, uint64_t begin, uint64_t end) { auto t = rankAll(c, begin, end); return get<2>(t); } // v[begin, end)で(cと同じ値の数、cより小さい値の数、cより大きい値の数)を求める tuple rankAll(const uint64_t c, uint64_t begin, uint64_t end) { // assert(end <= size); const uint64_t num = end - begin; if (begin >= end) { return make_tuple(0, 0, 0); } if (c >= maximum_element || end == 0) { return make_tuple(0, num, 0); } uint64_t rank_less_than = 0, rank_more_than = 0; for (size_t i = 0; i < bit_size && begin < end; ++i) { const uint64_t bit = (c >> (bit_size - i - 1)) & 1; const uint64_t rank0_begin = this->bit_arrays[i].rank(0, begin); const uint64_t rank0_end = this->bit_arrays[i].rank(0, end); const uint64_t rank1_begin = begin - rank0_begin; const uint64_t rank1_end = end - rank0_end; if (bit) { rank_less_than += (rank0_end - rank0_begin); // i番目のbitが0のものは除外される begin = this->begin_one[i] + rank1_begin; end = this->begin_one[i] + rank1_end; } else { rank_more_than += (rank1_end - rank1_begin); // i番目のbitが1のものは除外される begin = rank0_begin; end = rank0_end; } } const uint64_t rank = num - rank_less_than - rank_more_than; return make_tuple(rank, rank_less_than, rank_more_than); } // T[s, e)で出現回数が多い順にk個の(値,頻度)を返す // 頻度が同じ場合は値が小さいものが優先される vector> topk(uint64_t s, uint64_t e, uint64_t k) { // assert(s < e); vector> result; // (頻度,深さ,値)の順でソート auto c = [](const tuple &l, const tuple &r) { // width if (get<0>(l) != get<0>(r)) { return get<0>(l) < get<0>(r); } // depth if (get<3>(l) != get<3>(r)) { return get<3>(l) > get<3>(r); } // value if (get<4>(l) != get<4>(r)) { return get<4>(l) > get<4>(r); } return true; }; std::priority_queue, vector>, decltype(c)> que(c); // width, left, right, depth, value que.push(make_tuple(e - s, s, e, 0, 0)); while (not que.empty()) { auto element = que.top(); que.pop(); uint64_t width, left, right, depth, value; tie(width, left, right, depth, value) = element; if (depth >= this->bit_size) { result.emplace_back(make_pair(value, right - left)); if (result.size() >= k) { break; } continue; } // 0 const uint64_t left0 = this->bit_arrays[depth].rank(0, left); const uint64_t right0 = this->bit_arrays[depth].rank(0, right); if (left0 < right0) { que.push(make_tuple(right0 - left0, left0, right0, depth + 1, value)); } // 1 const uint64_t left1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, left); const uint64_t right1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, right); if (left1 < right1) { que.push(make_tuple(right1 - left1, left1, right1, depth + 1, value | (1 << (bit_size - depth - 1)))); } } return result; }; // T[begin_pos, end_pos)でx <= c < yを満たすcの和を返す uint64_t rangeSum(const uint64_t begin, const uint64_t end, const uint64_t x, const uint64_t y) { return rangeSum(begin, end, 0, 0, x, y); } // T[begin_pos, end_pos)でx <= c < yを満たす最大のcを返す uint64_t prev_value(const uint64_t begin_pos, const uint64_t end_pos, const uint64_t x, uint64_t y) { // assert(end_pos <= size); const uint64_t num = end_pos - begin_pos; if (x >= y or y == 0) { return NOTFOUND; } if (y > maximum_element) { y = maximum_element; } if (begin_pos >= end_pos) { return NOTFOUND; } if (x >= maximum_element || end_pos == 0) { return NOTFOUND; } y--; // x <= c <= yにする stack> s; // (begin, end, depth, c, tight) s.emplace(make_tuple(begin_pos, end_pos, 0, 0, true)); while (not s.empty()) { uint64_t b, e, depth, c; bool tight; tie(b, e, depth, c, tight) = s.top(); s.pop(); if (depth == bit_size) { if (c >= x) { return c; } continue; } const uint64_t bit = (y >> (bit_size - depth - 1)) & 1; const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, b); const uint64_t rank0_end = this->bit_arrays[depth].rank(0, e); const uint64_t rank1_begin = b - rank0_begin; const uint64_t rank1_end = e - rank0_end; // d番目のbitが0のものを使う const uint64_t b0 = rank0_begin; const uint64_t e0 = rank0_end; if (b0 != e0) { // 範囲がつぶれてない const uint64_t c0 = ((c << 1) | 0); s.emplace(make_tuple(b0, e0, depth + 1, c0, tight and bit == 0)); } // d番目のbitが1のものを使う const uint64_t b1 = this->begin_one[depth] + rank1_begin; const uint64_t e1 = this->begin_one[depth] + rank1_end; if (b1 != e1) { if (not tight or bit == 1) { const auto c1 = ((c << 1) | 1); s.emplace(make_tuple(b1, e1, depth + 1, c1, tight)); } } } return NOTFOUND; } // T[begin_pos, end_pos)でx <= c < yを満たす最小のcを返す uint64_t next_value(const uint64_t begin_pos, const uint64_t end_pos, const uint64_t x, const uint64_t y) { // assert(end_pos <= size); const uint64_t num = end_pos - begin_pos; if (x >= y or y == 0) { return NOTFOUND; } if (begin_pos >= end_pos) { return NOTFOUND; } if (x >= maximum_element || end_pos == 0) { return NOTFOUND; } stack> s; // (begin, end, depth, c, tight) s.emplace(make_tuple(begin_pos, end_pos, 0, 0, true)); while (not s.empty()) { uint64_t b, e, depth, c; bool tight; tie(b, e, depth, c, tight) = s.top(); s.pop(); if (depth == bit_size) { if (c < y) { return c; } continue; } const uint64_t bit = (x >> (bit_size - depth - 1)) & 1; const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, b); const uint64_t rank0_end = this->bit_arrays[depth].rank(0, e); const uint64_t rank1_begin = b - rank0_begin; const uint64_t rank1_end = e - rank0_end; // d番目のbitが1のものを使う const uint64_t b1 = this->begin_one[depth] + rank1_begin; const uint64_t e1 = this->begin_one[depth] + rank1_end; if (b1 != e1) { const auto c1 = ((c << 1) | 1); s.emplace(make_tuple(b1, e1, depth + 1, c1, tight and bit == 1)); } // d番目のbitが0のものを使う const uint64_t b0 = rank0_begin; const uint64_t e0 = rank0_end; if (b0 != e0) { if (not tight or bit == 0) { const uint64_t c0 = ((c << 1) | 0); s.emplace(make_tuple(b0, e0, depth + 1, c0, tight)); } } } return NOTFOUND; } // T[s1, e1)とT[s2, e2)に共通して出現する要素を求める vector> intersect(uint64_t _s1, uint64_t _e1, uint64_t _s2, uint64_t _e2) { // assert(_s1 < _e1); // assert(_s2 < _e2); vector> intersection; queue> que; // s1, e1, s2, e2, depth, value que.push(make_tuple(_s1, _e1, _s2, _e2, 0, 0)); while (not que.empty()) { auto e = que.front(); que.pop(); uint64_t s1, e1, s2, e2, depth, value; tie(s1, e1, s2, e2, depth, value) = e; if (depth >= this->bit_size) { intersection.emplace_back(make_tuple(value, e1 - s1, e2 - s2)); continue; } // 0 uint64_t s1_0 = this->bit_arrays[depth].rank(0, s1); uint64_t e1_0 = this->bit_arrays[depth].rank(0, e1); uint64_t s2_0 = this->bit_arrays[depth].rank(0, s2); uint64_t e2_0 = this->bit_arrays[depth].rank(0, e2); if (s1_0 != e1_0 and s2_0 != e2_0) { que.push(make_tuple(s1_0, e1_0, s2_0, e2_0, depth + 1, value)); } // 1 uint64_t s1_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, s1); uint64_t e1_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, e1); uint64_t s2_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, s2); uint64_t e2_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, e2); if (s1_1 != e1_1 and s2_1 != e2_1) { que.push(make_tuple(s1_1, e1_1, s2_1, e2_1, depth + 1, value | (1 << bit_size - depth - 1))); } } return intersection; }; private: uint64_t get_num_of_bit(uint64_t x) { if (x == 0) return 0; x--; uint64_t bit_num = 0; while (x >> bit_num) { ++bit_num; } return bit_num; } uint64_t rangeSum(const uint64_t begin, const uint64_t end, const uint64_t depth, const uint64_t c, const uint64_t x, const uint64_t y) { if (begin == end) { return 0; } if (depth == bit_size) { if (x <= c and c < y) { return c * (end - begin); // 値 * 頻度 } return 0; } const uint64_t next_c = ((uint64_t)1 << (bit_size - depth - 1)) | c; // 上からdepth番目のbitを立てる const uint64_t all_one_c = (((uint64_t)1 << (bit_size - depth - 1)) - 1) | next_c; // depth以降のbitをたてる(これ以降全部1を選んだときの値) if(all_one_c < x or y <= c) { return 0; } // [begin, pos)のすべての要素は[x, y) if (x <= c and all_one_c < y) { return this->cumulative_sum[depth][end] - this->cumulative_sum[depth][begin]; } const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, begin); const uint64_t rank0_end = this->bit_arrays[depth].rank(0, end); const uint64_t rank1_begin = begin - rank0_begin; const uint64_t rank1_end = end - rank0_end; return rangeSum(rank0_begin, rank0_end, depth + 1, c, x, y) + rangeSum(this->begin_one[depth] + rank1_begin, this->begin_one[depth] + rank1_end, depth + 1, next_c, x, y); } }; uint64_t uabs(uint64_t a, uint64_t b) { return max(a, b) - min(a, b); } signed main() { int N, K; cin >> N >> K; vector A(N); // uint64_t e10 = 3e9+EPS; for (int i = 0; i < N; i++) { cin >> A[i]; // A[i] = e10 + A[i]; } WaveletMatrix WM(A); // 長さNの数列による初期化を行う // uint64_t inf = e10 + 1e9+1; // uint64_t zero = e10 - 1e9 - 1; uint64_t e9 = 1e9+EPS; uint64_t ans = INFL; for (int i = 0; i + K <= N; i++) { uint64_t l = i, r = i + K - 1; uint64_t m1 = WM.quantileRange(l, r + 1, (r - l) / 2); // index uint64_t vm1 = WM.access(m1); // value uint64_t large1 = WM.rangeFreq(l, r + 1, vm1, e9); // value uint64_t small1 = WM.rangeFreq(l, r + 1, 0, vm1); // value uint64_t sumr = WM.rangeSum(l, r + 1, vm1, e9); // value uint64_t suml = WM.rangeSum(l, r + 1, 0, vm1); // value // cout << m1 << " " << vm1 << endl; // cout << ma1 << " " << vma1 << " " << large1 << endl; // cout << mi1 << " " << vmi1 << " " << small1 << endl; // cout << suml << " " << sumr << endl; uint64_t s1 = uabs(sumr , vm1 * large1) + uabs(suml , vm1 * small1); ans = min(ans, s1); uint64_t m2 = WM.quantileRange(l, r + 1, (r - l + 1) / 2); // index if (m2 != NOTFOUND) { uint64_t vm2 = WM.access(m2); // value uint64_t large2 = WM.rangeFreq(l, r + 1, vm2, e9); // value uint64_t small2 = WM.rangeFreq(l, r + 1, 0, vm2 + 1); // value uint64_t sumr2 = WM.rangeSum(l, r + 1, vm2, e9); // value uint64_t suml2 = WM.rangeSum(l, r + 1, 0, vm2 + 1); // value // cout << m2 << " " << vm2 << endl; // cout << ma2 << " " << vma2 << " " << large2 << endl; // cout << mi2 << " " << vmi2 << " " << small2 << endl; // cout << suml2 << " " << sumr2 << endl; uint64_t s2 = uabs(sumr2 , vm2 * large2) + uabs(suml2 , vm2 * small2); ans = min(ans, s2); } } cout << ans << endl; }