結果

問題 No.738 平らな農地
ユーザー T101010101T101010101
提出日時 2024-03-19 12:24:05
言語 C++17
(gcc 12.3.0 + boost 1.83.0)
結果
AC  
実行時間 1,388 ms / 2,000 ms
コード長 43,764 bytes
コンパイル時間 7,461 ms
コンパイル使用メモリ 420,980 KB
実行使用メモリ 34,448 KB
最終ジャッジ日時 2024-09-30 05:24:45
合計ジャッジ時間 55,070 ms
ジャッジサーバーID
(参考情報)
judge5 / judge3
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 2 ms
6,816 KB
testcase_01 AC 2 ms
6,820 KB
testcase_02 AC 2 ms
6,820 KB
testcase_03 AC 2 ms
6,816 KB
testcase_04 AC 2 ms
6,820 KB
testcase_05 AC 24 ms
6,820 KB
testcase_06 AC 28 ms
6,820 KB
testcase_07 AC 17 ms
6,816 KB
testcase_08 AC 10 ms
6,816 KB
testcase_09 AC 6 ms
6,820 KB
testcase_10 AC 3 ms
6,820 KB
testcase_11 AC 9 ms
6,820 KB
testcase_12 AC 5 ms
6,820 KB
testcase_13 AC 20 ms
6,820 KB
testcase_14 AC 8 ms
6,816 KB
testcase_15 AC 743 ms
29,832 KB
testcase_16 AC 752 ms
30,492 KB
testcase_17 AC 950 ms
30,360 KB
testcase_18 AC 923 ms
29,692 KB
testcase_19 AC 1,165 ms
31,864 KB
testcase_20 AC 768 ms
30,500 KB
testcase_21 AC 1,064 ms
32,120 KB
testcase_22 AC 806 ms
30,636 KB
testcase_23 AC 1,048 ms
31,176 KB
testcase_24 AC 1,070 ms
30,644 KB
testcase_25 AC 6 ms
6,816 KB
testcase_26 AC 6 ms
6,816 KB
testcase_27 AC 7 ms
6,820 KB
testcase_28 AC 6 ms
6,820 KB
testcase_29 AC 7 ms
6,820 KB
testcase_30 AC 6 ms
6,820 KB
testcase_31 AC 7 ms
6,820 KB
testcase_32 AC 7 ms
6,820 KB
testcase_33 AC 7 ms
6,820 KB
testcase_34 AC 6 ms
6,816 KB
testcase_35 AC 6 ms
6,816 KB
testcase_36 AC 6 ms
6,816 KB
testcase_37 AC 6 ms
6,820 KB
testcase_38 AC 6 ms
6,816 KB
testcase_39 AC 6 ms
6,820 KB
testcase_40 AC 7 ms
6,816 KB
testcase_41 AC 7 ms
6,816 KB
testcase_42 AC 6 ms
6,816 KB
testcase_43 AC 6 ms
6,820 KB
testcase_44 AC 6 ms
6,816 KB
testcase_45 AC 679 ms
32,660 KB
testcase_46 AC 716 ms
30,088 KB
testcase_47 AC 674 ms
31,784 KB
testcase_48 AC 565 ms
30,764 KB
testcase_49 AC 563 ms
30,364 KB
testcase_50 AC 540 ms
31,708 KB
testcase_51 AC 732 ms
31,984 KB
testcase_52 AC 648 ms
30,492 KB
testcase_53 AC 681 ms
31,168 KB
testcase_54 AC 729 ms
31,984 KB
testcase_55 AC 760 ms
31,856 KB
testcase_56 AC 723 ms
31,568 KB
testcase_57 AC 648 ms
30,076 KB
testcase_58 AC 648 ms
31,224 KB
testcase_59 AC 632 ms
32,660 KB
testcase_60 AC 592 ms
32,388 KB
testcase_61 AC 616 ms
32,120 KB
testcase_62 AC 624 ms
30,096 KB
testcase_63 AC 760 ms
32,656 KB
testcase_64 AC 702 ms
32,120 KB
testcase_65 AC 1,234 ms
30,364 KB
testcase_66 AC 1,287 ms
31,452 KB
testcase_67 AC 744 ms
32,268 KB
testcase_68 AC 714 ms
33,076 KB
testcase_69 AC 1,224 ms
33,368 KB
testcase_70 AC 916 ms
34,448 KB
testcase_71 AC 47 ms
8,076 KB
testcase_72 AC 710 ms
29,248 KB
testcase_73 AC 599 ms
27,652 KB
testcase_74 AC 942 ms
30,444 KB
testcase_75 AC 1,091 ms
32,004 KB
testcase_76 AC 834 ms
33,084 KB
testcase_77 AC 1,272 ms
32,000 KB
testcase_78 AC 1,350 ms
34,304 KB
testcase_79 AC 1,264 ms
34,448 KB
testcase_80 AC 897 ms
33,360 KB
testcase_81 AC 1,106 ms
32,452 KB
testcase_82 AC 1,278 ms
33,620 KB
testcase_83 AC 884 ms
31,756 KB
testcase_84 AC 728 ms
30,836 KB
testcase_85 AC 1,388 ms
34,184 KB
testcase_86 AC 1,191 ms
32,000 KB
testcase_87 AC 114 ms
33,324 KB
testcase_88 AC 111 ms
32,000 KB
testcase_89 AC 2 ms
6,816 KB
testcase_90 AC 2 ms
6,816 KB
testcase_91 AC 2 ms
6,816 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#pragma region Macros

#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,fma,abm,mmx,avx,avx2")

#include <bits/extc++.h>
#include <immintrin.h>
// #include <atcoder/all>
// using namespace atcoder;
using namespace std;
using namespace __gnu_pbds;

// #include <boost/multiprecision/cpp_dec_float.hpp>
// #include <boost/multiprecision/cpp_int.hpp>
// namespace mp = boost::multiprecision;
// using Bint = mp::cpp_int;
// using Bdouble = mp::number<mp::cpp_dec_float<256>>;

#define pb emplace_back
#define int ll
#define endl '\n'

#define sqrt __builtin_sqrt
#define cbrt __builtin_cbrt
#define hypot __builtin_hypot

using ll = long long;
using ld = long double;
const ld PI = acosl(-1);
const int INF = 1 << 30;
const ll INFL = 1LL << 61;
const int MOD = 998244353;
// const int MOD = 1000000007;

const ld EPS = 1e-10;
const bool equals(ld a, ld b) { return fabs((a) - (b)) < EPS; }

const vector<int> dx = {0, 1, 0, -1, 1, 1, -1, -1}; // → ↓ ← ↑ ↘ ↙ ↖ ↗
const vector<int> dy = {1, 0, -1, 0, 1, -1, -1, 1};

struct Edge {
    int from, to;
    ll cost;
    Edge(int to, ll cost) : to(to), cost(cost) {}
    Edge(int from, int to, ll cost) : from(from), to(to), cost(cost) {}
};

chrono::system_clock::time_point  start, now;
__attribute__((constructor))
void constructor() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    cout << fixed << setprecision(10);
    start = chrono::system_clock::now();
}

__int128_t POW(__int128_t x, int n) {
    __int128_t ret = 1;
    assert(n >= 0);
    if (x == 1 or n == 0) ret = 1;
    else if (x == -1 && n % 2 == 0) ret = 1; 
    else if (x == -1) ret = -1; 
    else if (n % 2 == 0) {
        assert(x < INFL);
        ret = POW(x * x, n / 2);
    } else {
        assert(x < INFL);
        ret = x * POW(x, n - 1);
    }
    return ret;
}
int per(int x, int y) { // x = qy + r (0 <= r < y) を満たすq
    assert(y != 0);
    if (x >= 0 && y > 0) return x / y;
    if (x >= 0 && y < 0) return x / y - (x % y < 0);
    if (x < 0 && y < 0) return x / y + (x % y < 0);
    return x / y - (x % y < 0); //  (x < 0 && y > 0) 
}
// int perl(ld x, ld y) { // perld(4.5, 2.1) = 2  // TODO
//     if (-EPS < x && x < 0 or 0 < x && x < EPS) x = 0;
//     if (-EPS < y && y < 0 or 0 < x && x < EPS) y = 0;
//     assert(!equals(y, 0));
//     if (x >= 0 && y > 0) return floor(x / y)+EPS;
//     if (x >= 0 && y < 0) return floor(x / y) - (x - floor(x/y)*y < -EPS);
//     if (x < 0 && y < 0) return floor(x / y) + (x - floor(x/y)*y < -EPS);
//     return floor(x / y) - (x - floor(x/y)*y < -EPS); //  (x < 0 && y > 0) 
// }
int mod(int x, int y) { // x = qy + r (0 <= r < y) を満たすr
    assert(y != 0);
    if (x >= 0) return x % y;
    __int128_t ret = x % y; // (x < 0)
    ret += (__int128_t)abs(y) * INFL;
    ret %= abs(y);
    return ret;
}
// ld modl(ld x, ld y) { // TODO
//     assert(!equals(y, 0));
//     if (x >= -EPS) return (x - floor(x/y)*y);
//     ld ret = x - floor(x/y)*y; // (x < 0)
//     ret += abs(y) * INFL; // TODO : オーバーフローする?
//     ret = x - floor(x/abs(y))*abs(y);
//     return ret;
// }
// int floor(int x, int y) { // TODO
//     assert(y != 0);
//     if (b < 0) a = -a, b = -b;
//     return a >= 0 ? a / b : (a + 1) / b - 1;
// }
// int ceil(int x, int y) { // TODO
// assert(y != 0);
//     if (b < 0) a = -a, b = -b;
//     return a > 0 ? (a - 1) / b + 1 : a / b;
// }
// int floorl(ld x, ld y) { return 0; } // TODO
// int ceill(ld x, ld y) { return 0; } // TODO
// int gauss(int x, int y) {
//     assert(y != 0);
//     return x / y;
// } // 整数部分(未verify)
// int gauss(ld x, ld y) { return 0; } // TODO

pair<int, int> max(const pair<int, int> &a, const pair<int, int> &b) {
    if (a.first > b.first or a.first == b.first && a.second > b.second) {
        return a;
    }
    return b;
}
pair<int, int> min(const pair<int, int> &a, const pair<int, int> &b) {
    if (a.first < b.first or a.first == b.first && a.second < b.second) {
        return a;
    }
    return b;
}

template <class T> bool chmax(T &a, const T& b) {
    if (a < b) { a = b; return true; }
    return false;
}
template <class T> bool chmin(T &a, const T& b) {
    if (a > b) { a = b; return true; }
    return false;
}
template <class T> T mid(T a, T b, T c) {
    return a + b + c - max({a, b, c}) - min({a, b, c});
}
template <class T> void sort(T &a, T &b, T &c, bool rev = false) {
    if (rev == false) { 
        if (a > b) swap(a, b);
        if (a > c) swap(a, c);
        if (b > c) swap(b, c);
    } else {
        if (c > b) swap(c, b);
        if (c > a) swap(c, a);
        if (b > a) swap(b, a);
    }
}
template <class T> void sort(T &a, T &b, T &c, T &d, bool rev = false) {
    if (rev == false) { 
        if (a > b) swap(a, b); if (a > c) swap(a, c);  if (a > d) swap(a, d);
        if (b > c) swap(b, c); if (b > d) swap(b, d);
        if (c > d) swap(c, d);
    } else {
        if (d > c) swap(d, c); if (d > b) swap(d, b); if (d > a) swap(d, a);
        if (c > b) swap(c, b); if (c > a) swap(c, a);
        if (b > a) swap(b, a);
    }
}

int countl_zero(int x) { return __builtin_clzll(x); }
int countl_one(int x) {
    int ret = 0; while (x % 2) { x /= 2; ret++; }
    return ret;
}
int countr_zero(int x) { return __builtin_ctzll(x); }
int countr_one(int x) {
    int ret = 0, k = 63 - __builtin_clzll(x);
    while (k != -1 && (x & (1LL << k))) { k--; ret++; }
    return ret;
}
int popcount(int x) { return __builtin_popcountll(x); }
int unpopcount(int x) { return 64 - __builtin_clzll(x) - __builtin_popcountll(x); }

int top_bit(int x) { return 63 - __builtin_clzll(x);} // 2^kの位
int bot_bit(int x) { return __builtin_ctz(x);} // 2^kの位
int MSB(int x) { return 1 << (63 - __builtin_clzll(x)); } // mask
int LSB(int x) { return (x & -x); } // mask

int bit_width(int x) { return 64 - __builtin_clzll(x); } // 桁数
int ceil_log2(int x) { return 63 - __builtin_clzll(x); }
int bit_floor(int x) { return 1 << (63 - __builtin_clzll(x)); }
int floor_log2(int x) { return 64 - __builtin_clzll(x-1); }
int bit_ceil(int x) { return 1 << (64 - __builtin_clzll(x-1)) - (x==1); }

int hamming(int a, int b) { return popcount(a ^ b); }
int compcnt(int x) { return (popcount(x^(x >> 1)) + (x&1)) / 2; }

class UnionFind {
public:
	UnionFind() = default;
    UnionFind(int N) : par(N), sz(N, 1) {
        iota(par.begin(), par.end(), 0);
    }

	int root(int x) {
		if (par[x] == x) return x;
		return (par[x] = root(par[x]));
	}

	bool unite(int x, int y) {
		int rx = root(x);
		int ry = root(y);

        if (rx == ry) return false;
		if (sz[rx] < sz[ry]) swap(rx, ry);

		sz[rx] += sz[ry];
		par[ry] = rx;

        return true;
	}

	bool issame(int x, int y) { return (root(x) == root(y)); }
	int size(int x) { return sz[root(x)]; }

    vector<vector<int>> groups(int N) {
        vector<vector<int>> G(N);
        for (int x = 0; x < N; x++) {
            G[root(x)].push_back(x);
        }
		G.erase(
            remove_if(G.begin(), G.end(),
                [&](const vector<int>& V) { return V.empty(); }),
                    G.end());
        return G;
    }
private:
	vector<int> par;
	vector<int> sz;
};

template<int mod> class Modint{
public:
    int val = 0;
    Modint(int x = 0) { while (x < 0) x += mod; val = x % mod; }
    Modint(const Modint &r) { val = r.val; }

    Modint operator -() { return Modint(-val); } // 単項
    Modint operator +(const Modint &r) { return Modint(*this) += r; }
    Modint operator +(const int &q) { Modint r(q); return Modint(*this) += r; }
    Modint operator -(const Modint &r) { return Modint(*this) -= r; }
    Modint operator -(const int &q) { Modint r(q); return Modint(*this) -= r; }
    Modint operator *(const Modint &r) { return Modint(*this) *= r; }
    Modint operator *(const int &q) { Modint r(q); return Modint(*this) *= r; }
    Modint operator /(const Modint &r) { return Modint(*this) /= r; }
    Modint operator /(const int &q) { Modint r(q); return Modint(*this) /= r; }
    
    Modint& operator ++() { val++; if (val >= mod) val -= mod; return *this; } // 前置
    Modint operator ++(signed) { ++*this; return *this; } // 後置
    Modint& operator --() { val--; if (val < 0) val += mod; return *this; }
    Modint operator --(signed) { --*this; return *this; }
    Modint &operator +=(const Modint &r) { val += r.val; if (val >= mod) val -= mod; return *this; }
    Modint &operator +=(const int &q) { Modint r(q); val += r.val; if (val >= mod) val -= mod; return *this; }
    Modint &operator -=(const Modint &r) { if (val < r.val) val += mod; val -= r.val; return *this; }
    Modint &operator -=(const int &q) { Modint r(q);  if (val < r.val) val += mod; val -= r.val; return *this; }
    Modint &operator *=(const Modint &r) { val = val * r.val % mod; return *this; }
    Modint &operator *=(const int &q) { Modint r(q); val = val * r.val % mod; return *this; }
    Modint &operator /=(const Modint &r) {
        int a = r.val, b = mod, u = 1, v = 0;
        while (b) {int t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v);}
        val = val * u % mod; if (val < 0) val += mod;
        return *this;
    }
    Modint &operator /=(const int &q) {
        Modint r(q); int a = r.val, b = mod, u = 1, v = 0;
        while (b) {int t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v);}
        val = val * u % mod; if (val < 0) val += mod;
        return *this;
    }

    bool operator ==(const Modint& r) { return this -> val == r.val; }
    bool operator <(const Modint& r) { return this -> val < r.val; }
    bool operator >(const Modint& r) { return this -> val > r.val; }
    bool operator !=(const Modint& r) { return this -> val != r.val; }
};

using mint = Modint<MOD>;
// using Mint = modint998244353;

istream &operator >>(istream &is, mint& x) {
    int t; is >> t;
    x = t;
    return (is);
}
ostream &operator <<(ostream &os, const mint& x) {
    return os << x.val;
}
mint modpow(const mint &x, int n) {
    assert(n >= 0); // TODO: n <= -1
    if (n == 0) return 1;
    mint t = modpow(x, n / 2);
    t = t * t;
    if (n & 1) t = t * x;
    return t;
}

int modpow(__int128_t x, int n, int mod) {
    assert(n >= 0 && mod > 0); // TODO: n <= -1
    __int128_t ret = 1;
    while (n > 0) {
        if (n % 2 == 1) ret = ret * x % mod;
        x = x * x % mod;
        n /= 2;
    }
    return ret;
}

int modinv(__int128_t x, int mod) {
    assert(mod > 0 && x > 0);
    if (x == 1) return 1;
    return mod - modinv(mod % x, mod) * (mod / x) % mod;
}

istream &operator >>(istream &is, __int128_t& x) {
    string S; is >> S;
    __int128_t ret = 0;
    int f = 1;
    if (S[0] == '-') f = -1; 
    for (int i = 0; i < S.length(); i++)
        if ('0' <= S[i] && S[i] <= '9')
            ret = ret * 10 + S[i] - '0';
    x = ret * f;
    return (is);
}
ostream &operator <<(ostream &os, __int128_t x) {
    ostream::sentry s(os);
    if (s) {
        __uint128_t tmp = x < 0 ? -x : x;
        char buffer[128];
        char *d = end(buffer);

        do {
            --d;
            *d = "0123456789"[tmp % 10];
            tmp /= 10;
        } while (tmp != 0);

        if (x < 0) {
            --d;
            *d = '-';
        }
        int len = end(buffer) - d;

        if (os.rdbuf()->sputn(d, len) != len) {
            os.setstate(ios_base::badbit);
        }
    }
    return os;
}

__int128_t stoll(string &S) {
    __int128_t ret = 0;
    int f = 1;
    if (S[0] == '-') f = -1; 
    for (int i = 0; i < S.length(); i++)
        if ('0' <= S[i] && S[i] <= '9')
            ret = ret * 10 + S[i] - '0';
    return ret * f;
}
__int128_t gcd(__int128_t a, __int128_t b) {
    return b ? gcd(b, a % b) : a;
}
__int128_t lcm(__int128_t a, __int128_t b) {
    return a / gcd(a, b) * b;
    // lcmが__int128_tに収まる必要あり
}

string to_string(ld x, int k) { // xの小数第k位までをstring化する
    assert(k >= 0);
    stringstream ss;
    ss << setprecision(k + 2) << x;
    string s = ss.str();
    if (s.find('.') == string::npos) s += '.';
    int pos = s.find('.');
    for (int i = 0; k >= (int)s.size() - 1 - pos; i++) s += '0';
    s.pop_back();
    if (s.back() == '.') s.pop_back();
    return s;

    // stringstream ss; // 第k+1位を四捨五入して第k位まで返す
    // ss << setprecision(k + 1) << x;
    // string s = ss.str();
    // if (s.find('.') == string::npos) s += '.';
    // int pos = s.find('.');
    // for (int i = 0; k > (int)s.size() - 1 - pos; i++) s += '0';
    // if (s.back() == '.') s.pop_back();
    // return s;
}

string to_string(__int128_t x) {
    string ret = "";
    if (x < 0) {
        ret += "-";
        x *= -1;
    }
    while (x) {
        ret += (char)('0' + x % 10);
        x /= 10;
    }
    reverse(ret.begin(), ret.end());
    return ret;
}
string to_string(char c) {
    string s = "";
    s += c;
    return s;
}

struct SXor128 {
    uint64_t x = 88172645463325252LL;
    unsigned Int() {
        x = x ^ (x << 7);
        return x = x ^ (x >> 9);
    }
    unsigned Int(unsigned mod) {
        x = x ^ (x << 7);
        x = x ^ (x >> 9);
        return x % mod;
    }
    unsigned Int(unsigned l, unsigned r) {
        x = x ^ (x << 7);
        x = x ^ (x >> 9);
        return x % (r - l + 1) + l;
    }
    double Double() {
        return double(Int()) / UINT_MAX;
    }
} rnd;

struct custom_hash {
    static uint64_t splitmix64(uint64_t x) {
        x += 0x9e3779b97f4a7c15;
        x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
        x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
        return x ^ (x >> 31);
    }

    size_t operator()(uint64_t x) const {
        static const uint64_t FIXED_RANDOM = chrono::steady_clock::now().time_since_epoch().count();
        return splitmix64(x + FIXED_RANDOM);
    }
};

template<class T> size_t HashCombine(const size_t seed,const T &v){
    return seed^(hash<T>()(v)+0x9e3779b9+(seed<<6)+(seed>>2));
}
template<class T,class S> struct hash<pair<T,S>>{
    size_t operator()(const pair<T,S> &keyval) const noexcept {
        return HashCombine(hash<T>()(keyval.first), keyval.second);
    }
};
template<class T> struct hash<vector<T>>{
    size_t operator()(const vector<T> &keyval) const noexcept {
        size_t s=0;
        for (auto&& v: keyval) s=HashCombine(s,v);
        return s;
    }
};
template<int N> struct HashTupleCore{
    template<class Tuple> size_t operator()(const Tuple &keyval) const noexcept{
        size_t s=HashTupleCore<N-1>()(keyval);
        return HashCombine(s,get<N-1>(keyval));
    }
};
template <> struct HashTupleCore<0>{
    template<class Tuple> size_t operator()(const Tuple &keyval) const noexcept{ return 0; }
};
template<class... Args> struct hash<tuple<Args...>>{
    size_t operator()(const tuple<Args...> &keyval) const noexcept {
        return HashTupleCore<tuple_size<tuple<Args...>>::value>()(keyval);
    }
};

vector<mint> _fac, _finv, _inv;
void COMinit(int N) {
    _fac.resize(N + 1);
    _finv.resize(N + 1);
    _inv.resize(N + 1);
    _fac[0] = _fac[1] = 1;
    _finv[0] = _finv[1] = 1;
    _inv[1] = 1;
    for (int i = 2; i <= N; i++) {
        _fac[i] = _fac[i-1] * mint(i);
        _inv[i] = -_inv[MOD % i] * mint(MOD / i);
        _finv[i] = _finv[i - 1] * _inv[i];
    }
}

mint FAC(int N) {
    if (N < 0) return 0;
    return _fac[N];
}
mint COM(int N, int K) {
    if (N < K) return 0;
    if (N < 0 or K < 0) return 0;
    return _fac[N] * _finv[K] * _finv[N - K];
}
mint PERM(int N, int K) {
    if (N < K) return 0;
    if (N < 0 or K < 0) return 0;
    return _fac[N] *  _finv[N - K];
}
mint NHK(int N, int K) {
    if (N == 0 && K == 0)  return 1;
    return COM(N + K - 1, K);
}

#pragma endregion

enum {
    NOTFOUND = 0xFFFFFFFFFFFFFFFFLLU
};

class SuccinctBitVector {
private:
    const uint64_t size;    // ビットベクトルのサイズ
    static const uint64_t blockBitNum = 16;
    static const uint64_t LEVEL_L = 512;
    static const uint64_t LEVEL_S = 16;

    vector<uint64_t> L;   // 大ブロック
    vector<uint16_t> S;   // 小ブロック
    vector<uint16_t> B;   // ビットベクトル

    uint64_t numOne = 0;       // 1bitの数

public:
    explicit SuccinctBitVector(const uint64_t n) : size(n) {
        const uint64_t s = (n + blockBitNum - 1) / blockBitNum + 1;   // ceil(n, blockSize)
        this->B.assign(s, 0);
        this->L.assign(n / LEVEL_L + 1, 0);
        this->S.assign(n / LEVEL_S + 1, 0);
    }

    // B[pos] = bit
    void setBit(const uint64_t bit, const uint64_t pos) {
        // assert(bit == 0 or bit == 1);
        // assert(pos < this->size);

        const uint64_t blockPos = pos / blockBitNum;
        const uint64_t offset = pos % blockBitNum;
        if (bit == 1) { B[blockPos] |= (1LLU << offset); }
        else          { B[blockPos] &= (~(1LLU << offset)); }
    }

    // B[pos]
    uint64_t access(const uint64_t pos) {
        // assert(pos < this->size);
        const uint64_t blockPos = pos / blockBitNum;
        const uint64_t offset   = pos % blockBitNum;
        return ((B[blockPos] >> offset) & 1);
    }

    void build() {
        uint64_t num = 0;
        for (uint64_t i = 0; i <= size; i++) {
            if (i % LEVEL_L == 0) {
                L[i / LEVEL_L] = num;
            }
            if (i % LEVEL_S == 0) {
                S[i / LEVEL_S] = num - L[i / LEVEL_L];
            }
            if (i != size and i % blockBitNum == 0) {
                num += this->popCount(this->B[i / blockBitNum]);
            }
        }
        this-> numOne = num;
    }

    // B[0, pos)のbitの数
    uint64_t rank(const uint64_t bit, const uint64_t pos) {
        // assert(bit == 0 or bit == 1);
        // assert(pos <= this->size);

        if (bit) {
            return L[pos / LEVEL_L] + S[pos / LEVEL_S] + popCount(B[pos / blockBitNum] & ((1 << (pos % blockBitNum)) - 1));
        } else {
            return pos - rank(1, pos);
        }
    }

    // rank番目のbitの位置 + 1(rankは1-origin)
    uint64_t select(const uint64_t bit, const uint64_t rank) {
        // assert(bit == 0 or bit == 1);
        // assert(rank > 0);
        if (bit == 0 and rank > this->size - this-> numOne) { return NOTFOUND; }
        if (bit == 1 and rank > this-> numOne)              { return NOTFOUND; }

        // 大ブロックL内を検索
        uint64_t large_idx = 0;
        {
            uint64_t left = 0;
            uint64_t right = L.size();
            while (right - left > 1) {
                uint64_t mid = (left + right) / 2;
                uint64_t r = L[mid];
                r = (bit) ? r : mid * LEVEL_L - L[mid];

                if (r < rank) {
                    left = mid;
                    large_idx = mid;
                } else {
                    right = mid;
                }
            }
        }

        // 小ブロックS内を検索
        uint64_t small_idx = (large_idx * LEVEL_L) / LEVEL_S;
        {
            uint64_t left = (large_idx * LEVEL_L) / LEVEL_S;
            uint64_t right = min(((large_idx  + 1) * LEVEL_L) / LEVEL_S, (uint64_t)S.size());
            while (right - left > 1) {
                uint64_t mid = (left + right) / 2;
                uint64_t r = L[large_idx] + S[mid];
                r = (bit) ? r :mid * LEVEL_S - r;

                if (r < rank) {
                    left = mid;
                    small_idx = mid;
                } else {
                    right = mid;
                }
            }
        }

        // Bをブロック単位で順番に探索
        uint64_t rank_pos = 0;
        {
            const uint64_t begin_block_idx = (small_idx * LEVEL_S) / blockBitNum;
            uint64_t total_bit = L[large_idx] + S[small_idx];
            if (bit == 0) {
                total_bit = small_idx * LEVEL_S - total_bit;
            }
            for (uint64_t i = 0;; ++i) {
                uint64_t b = popCount(B[begin_block_idx + i]);
                if (bit == 0) {
                    b = blockBitNum - b;
                }
                if (total_bit + b >= rank) {
                    uint64_t block = (bit) ? B[begin_block_idx + i] : ~B[begin_block_idx + i];
                    rank_pos = (begin_block_idx + i) * blockBitNum + selectInBlock(block, rank - total_bit);
                    break;
                }
                total_bit += b;
            }
        }

        return rank_pos + 1;
    }

    uint64_t getNumOne() const {
        return numOne;
    }

    void debug() {
        cout << "LEVEL_L(" << L.size() << ")" << endl;
        for (uint64_t i = 0 ; i < L.size(); ++i) {
            cout << L[i] << ", ";
        }
        cout << endl;
        cout << "LEVEL_S(" << S.size() << ")" << endl;
        for (uint64_t i = 0 ; i < S.size(); ++i) {
            cout << S[i] << ", ";
        }
        cout << endl;
    }

private:
    uint64_t popCount(uint64_t x) {
        x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL);
        x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
        x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
        x = x + (x >>  8);
        x = x + (x >> 16);
        x = x + (x >> 32);
        return x & 0x7FLLU;
    }

    uint64_t selectInBlock(uint64_t x, uint64_t rank) {
        uint64_t x1 = x - ((x & 0xAAAAAAAAAAAAAAAALLU) >> 1);
        uint64_t x2 = (x1 & 0x3333333333333333LLU) + ((x1 >> 2) & 0x3333333333333333LLU);
        uint64_t x3 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FLLU;

        uint64_t pos = 0;
        for (;;  pos += 8) {
            uint64_t rank_next = (x3 >> pos) & 0xFFLLU;
            if (rank <= rank_next) break;
            rank -= rank_next;
        }

        uint64_t v2 = (x2 >> pos) & 0xFLLU;
        if (rank > v2) {
            rank -= v2;
            pos += 4;
        }

        uint64_t v1 = (x1 >> pos) & 0x3LLU;
        if (rank > v1) {
            rank -= v1;
            pos += 2;
        }

        uint64_t v0  = (x >> pos) & 0x1LLU;
        if (v0 < rank) {
            rank -= v0;
            pos += 1;
        }

        return pos;
    }
};

class WaveletMatrix {
private:
    vector<SuccinctBitVector> bit_arrays;
    vector<uint64_t> begin_one;                    // 各bitに着目したときの1の開始位置
    unordered_map<uint64_t, uint64_t> begin_alphabet;        // 最後のソートされた配列で各文字の開始位置
    vector<vector<uint64_t>> cumulative_sum;  // 各bitに着目したときの累積和

    uint64_t size;                                 // 与えられた配列のサイズ
    uint64_t maximum_element;                      // 文字数
    uint64_t bit_size;                             // 文字を表すのに必要なbit数

public:
    WaveletMatrix (const vector<uint64_t> &array) {
        // assert(array.size() > 0);
        size = array.size();
        maximum_element =  *max_element(array.begin(), array.end()) + 1;
        bit_size = get_num_of_bit(maximum_element);
        if (bit_size == 0) {
            bit_size = 1;
        }

        for (uint64_t i = 0; i < bit_size; ++i) {
            SuccinctBitVector sv(size);
            bit_arrays.push_back(sv);
        }
        this->begin_one.resize(bit_size);
        this->cumulative_sum.resize(bit_size + 1, vector<uint64_t>(size + 1, 0));

        for (uint64_t j = 0; j < array.size(); ++j) {
            this->cumulative_sum[0][j + 1] = this->cumulative_sum[0][j] + array[j];
        }

        vector<uint64_t> v(array);
        for (uint64_t i = 0; i < bit_size; ++i) {

            vector<uint64_t> temp;
            // 0をtempにいれてく
            for (uint64_t j = 0; j < v.size(); ++j) {
                uint64_t c = v[j];
                uint64_t bit = (c >> (bit_size - i - 1)) & 1;  // 上からi番目のbit
                if (bit == 0) {
                    temp.push_back(c);
                    bit_arrays[i].setBit(0, j);
                }
            }

            this->begin_one[i] = temp.size();

            // 1をtempにいれてく
            for (uint64_t j = 0; j < v.size(); ++j) {
                uint64_t c = v[j];
                uint64_t bit = (c >> (bit_size - i - 1)) & 1;  // 上からi番目のbit
                if (bit == 1) {
                    temp.push_back(c);
                    bit_arrays[i].setBit(1, j);
                }
            }

            for (uint64_t j = 0; j < temp.size(); ++j) {
                this->cumulative_sum[i + 1][j + 1] = this->cumulative_sum[i + 1][j] + temp[j];
            }

            bit_arrays[i].build();
            v = temp;
        }

        // ソートされた配列内での各文字の位置を取得
        for (int i = v.size() - 1; i >= 0; --i) {
            this->begin_alphabet[v[i]] = i;
        }
    }

    // v[pos]
    uint64_t access(uint64_t pos) {
        if (pos >= this->size) { return NOTFOUND; }

        uint64_t c = 0;
        for (uint64_t i = 0; i < bit_arrays.size(); ++i) {
            uint64_t bit = bit_arrays[i].access(pos);   // もとの数値のi番目のbit
            c = (c <<= 1) | bit;
            pos = bit_arrays[i].rank(bit, pos);
            if (bit) {
                pos += this->begin_one[i];
            }
        }
        return c;
    }

    // i番目のcの位置 + 1を返す。rankは1-origin
    uint64_t select(uint64_t c, uint64_t rank) {
        // assert(rank > 0);
        if (c >= maximum_element) {
            return NOTFOUND;
        }
        if (this->begin_alphabet.find(c) == this->begin_alphabet.end()) {
            return NOTFOUND;
        }

        uint64_t index = this->begin_alphabet[c] + rank;
        for (uint64_t i = 0; i < bit_arrays.size(); ++i) {
            uint64_t bit = ((c >> i) & 1);      // 下からi番目のbit
            if (bit == 1) {
                index -= this->begin_one[bit_size - i - 1];
            }
            index = this->bit_arrays[bit_size - i - 1].select(bit, index);
        }
        return index;
    }

    // v[begin_pos, end_pos)で最大値のindexを返す
    uint64_t maxRange(uint64_t begin_pos, uint64_t end_pos) {
        return quantileRange(begin_pos, end_pos, end_pos - begin_pos - 1);
    }

    // v[begin_pos, end_pos)で最小値のindexを返す
    uint64_t minRange(uint64_t begin_pos, uint64_t end_pos) {
        return quantileRange(begin_pos, end_pos, 0);
    }

    // v[begin_pos, end_pos)でk番目に小さい数値のindexを返す(kは0-origin)
    // つまり小さい順に並べてk番目の値
    uint64_t quantileRange(uint64_t begin_pos, uint64_t end_pos, uint64_t k) {
        if ((end_pos > size || begin_pos >= end_pos) || (k >= end_pos - begin_pos)) {
            return NOTFOUND;
        }

        uint64_t val = 0;
        for (uint64_t i = 0; i < bit_size; ++i) {
            const uint64_t num_of_zero_begin = bit_arrays[i].rank(0, begin_pos);
            const uint64_t num_of_zero_end = bit_arrays[i].rank(0, end_pos);
            const uint64_t num_of_zero = num_of_zero_end - num_of_zero_begin;     // beginからendまでにある0の数
            const uint64_t bit = (k < num_of_zero) ? 0 : 1;                       // k番目の値の上からi番目のbitが0か1か

            if (bit) {
                k -= num_of_zero;
                begin_pos = this->begin_one[i] + begin_pos - num_of_zero_begin;
                end_pos = this->begin_one[i] + end_pos - num_of_zero_end;
            }
            else {
                begin_pos = num_of_zero_begin;
                end_pos = num_of_zero_begin + num_of_zero;
            }

            val = ((val << 1) | bit);
        }

        uint64_t left = 0;
        for (uint64_t i = 0; i < bit_size; ++i) {
            const uint64_t bit = (val >> (bit_size - i - 1)) & 1;  // 上からi番目のbit
            left = bit_arrays[i].rank(bit, left);               // cのi番目のbitと同じ数値の数
            if (bit) {
                left += this->begin_one[i];
            }
        }

        const uint64_t rank = begin_pos + k - left + 1;
        return select(val, rank) - 1;
    }

    // v[0, pos)のcの数
    uint64_t rank(uint64_t c, uint64_t pos) {
        // assert(pos < size);
        if (c >= maximum_element) {
            return 0;
        }
        if (this->begin_alphabet.find(c) == this->begin_alphabet.end()) {
            return 0;
        }

        for (uint64_t i = 0; i < bit_size; ++i) {
            uint64_t bit = (c >> (bit_size - i - 1)) & 1;  // 上からi番目のbit
            pos = bit_arrays[i].rank(bit, pos);         // cのi番目のbitと同じ数値の数
            if (bit) {
                pos += this->begin_one[i];
            }
        }

        uint64_t begin_pos = this->begin_alphabet[c];
        return pos - begin_pos;
    }

    // v[begin_pos, end_pos)で[min, max)に入る値の個数
    uint64_t rangeFreq(uint64_t begin_pos, uint64_t end_pos, uint64_t min_c, uint64_t max_c) {
        if ((end_pos > size || begin_pos >= end_pos) || (min_c >= max_c) || min_c >= maximum_element) {
            return 0;
        }

        const auto maxi_t = rankAll(max_c, begin_pos, end_pos);
        const auto mini_t = rankAll(min_c, begin_pos, end_pos);
        return get<1>(maxi_t) - get<1>(mini_t);
    }

    // v[0, pos)でcより小さい文字の数
    uint64_t rankLessThan(uint64_t c, uint64_t begin, uint64_t end) {
        auto t = rankAll(c, begin, end);
        return get<1>(t);
    }

    // v[0, pos)でcより大きい文字の数
    uint64_t rankMoreThan(uint64_t c, uint64_t begin, uint64_t end) {
        auto t = rankAll(c, begin, end);
        return get<2>(t);
    }

    // v[begin, end)で(cと同じ値の数、cより小さい値の数、cより大きい値の数)を求める
    tuple<uint64_t, uint64_t, uint64_t> rankAll(const uint64_t c, uint64_t begin, uint64_t end) {
        // assert(end <= size);
        const uint64_t num = end - begin;

        if (begin >= end) {
            return make_tuple(0, 0, 0);
        }
        if (c >= maximum_element || end == 0) {
            return make_tuple(0, num, 0);
        }

        uint64_t rank_less_than = 0, rank_more_than = 0;
        for (size_t i = 0; i < bit_size && begin < end; ++i) {
            const uint64_t bit = (c >> (bit_size - i - 1)) & 1;

            const uint64_t rank0_begin = this->bit_arrays[i].rank(0, begin);
            const uint64_t rank0_end = this->bit_arrays[i].rank(0, end);
            const uint64_t rank1_begin = begin - rank0_begin;
            const uint64_t rank1_end = end - rank0_end;

            if (bit) {
                rank_less_than += (rank0_end - rank0_begin);    // i番目のbitが0のものは除外される
                begin = this->begin_one[i] + rank1_begin;
                end = this->begin_one[i] + rank1_end;
            } else {
                rank_more_than += (rank1_end - rank1_begin);    // i番目のbitが1のものは除外される
                begin = rank0_begin;
                end = rank0_end;
            }
        }

        const uint64_t rank = num - rank_less_than - rank_more_than;
        return make_tuple(rank, rank_less_than, rank_more_than);
    }

    // T[s, e)で出現回数が多い順にk個の(値,頻度)を返す
    // 頻度が同じ場合は値が小さいものが優先される
    vector<pair<uint64_t, uint64_t>> topk(uint64_t s, uint64_t e, uint64_t k) {
        // assert(s < e);
        vector<pair<uint64_t, uint64_t>> result;

        // (頻度,深さ,値)の順でソート
        auto c = [](const tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t> &l, const tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t> &r) {
            // width
            if (get<0>(l) != get<0>(r)) {
                return get<0>(l) < get<0>(r);
            }
            // depth
            if (get<3>(l) != get<3>(r)) {
                return get<3>(l) > get<3>(r);
            }
            // value
            if (get<4>(l) != get<4>(r)) {
                return get<4>(l) > get<4>(r);
            }
            return true;
        };

        std::priority_queue<tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>, vector<tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>>, decltype(c)> que(c);  // width, left, right, depth, value
        que.push(make_tuple(e - s, s, e, 0, 0));

        while (not que.empty()) {
            auto element = que.top(); que.pop();
            uint64_t width, left, right, depth, value;
            tie(width, left, right, depth, value) = element;

            if (depth >= this->bit_size) {
                result.emplace_back(make_pair(value, right - left));
                if (result.size() >= k) {
                    break;
                }
                continue;
            }

            // 0
            const uint64_t left0 = this->bit_arrays[depth].rank(0, left);
            const uint64_t right0 = this->bit_arrays[depth].rank(0, right);
            if (left0 < right0) {
                que.push(make_tuple(right0 - left0, left0, right0, depth + 1, value));
            }

            // 1
            const uint64_t left1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, left);
            const uint64_t right1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, right);
            if (left1 < right1) {
                que.push(make_tuple(right1 - left1, left1, right1, depth + 1, value | (1 << (bit_size - depth - 1))));
            }
        }

        return result;
    };

    // T[begin_pos, end_pos)でx <= c < yを満たすcの和を返す
    uint64_t rangeSum(const uint64_t begin, const uint64_t end, const uint64_t x, const uint64_t y) {
        return rangeSum(begin, end, 0, 0, x, y);
    }

    // T[begin_pos, end_pos)でx <= c < yを満たす最大のcを返す
    uint64_t prev_value(const uint64_t begin_pos, const uint64_t end_pos, const uint64_t x, uint64_t y) {
        // assert(end_pos <= size);
        const uint64_t num = end_pos - begin_pos;

        if (x >= y or y == 0) {
            return NOTFOUND;
        }
        if (y > maximum_element) {
            y = maximum_element;
        }

        if (begin_pos >= end_pos) {
            return NOTFOUND;
        }
        if (x >= maximum_element || end_pos == 0) {
            return NOTFOUND;
        }

        y--; // x <= c <= yにする

        stack<tuple<uint64_t, uint64_t, uint64_t, uint64_t, bool>> s;   // (begin, end, depth, c, tight)
        s.emplace(make_tuple(begin_pos, end_pos, 0, 0, true));

        while (not s.empty()) {
            uint64_t b, e, depth, c;
            bool tight;
            tie(b, e, depth, c, tight) = s.top(); s.pop();

            if (depth == bit_size) {
                if (c >= x) {
                    return c;
                }
                continue;
            }

            const uint64_t bit = (y >> (bit_size - depth - 1)) & 1;

            const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, b);
            const uint64_t rank0_end = this->bit_arrays[depth].rank(0, e);
            const uint64_t rank1_begin = b - rank0_begin;
            const uint64_t rank1_end = e - rank0_end;

            // d番目のbitが0のものを使う
            const uint64_t b0 = rank0_begin;
            const uint64_t e0 = rank0_end;
            if (b0 != e0) { // 範囲がつぶれてない
                const uint64_t c0 = ((c << 1) | 0);
                s.emplace(make_tuple(b0, e0, depth + 1, c0, tight and bit == 0));
            }

            // d番目のbitが1のものを使う
            const uint64_t b1 = this->begin_one[depth] + rank1_begin;
            const uint64_t e1 = this->begin_one[depth] + rank1_end;
            if (b1 != e1) {
                if (not tight or bit == 1) {
                    const auto c1 = ((c << 1) | 1);
                    s.emplace(make_tuple(b1, e1, depth + 1, c1, tight));
                }
            }
        }

        return NOTFOUND;
    }

    // T[begin_pos, end_pos)でx <= c < yを満たす最小のcを返す
    uint64_t next_value(const uint64_t begin_pos, const uint64_t end_pos, const uint64_t x, const uint64_t y) {
        // assert(end_pos <= size);
        const uint64_t num = end_pos - begin_pos;

        if (x >= y or y == 0) {
            return NOTFOUND;
        }

        if (begin_pos >= end_pos) {
            return NOTFOUND;
        }
        if (x >= maximum_element || end_pos == 0) {
            return NOTFOUND;
        }

        stack<tuple<uint64_t, uint64_t, uint64_t, uint64_t, bool>> s;   // (begin, end, depth, c, tight)
        s.emplace(make_tuple(begin_pos, end_pos, 0, 0, true));

        while (not s.empty()) {
            uint64_t b, e, depth, c;
            bool tight;
            tie(b, e, depth, c, tight) = s.top(); s.pop();

            if (depth == bit_size) {
                if (c < y) {
                    return c;
                }
                continue;
            }

            const uint64_t bit = (x >> (bit_size - depth - 1)) & 1;

            const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, b);
            const uint64_t rank0_end = this->bit_arrays[depth].rank(0, e);
            const uint64_t rank1_begin = b - rank0_begin;
            const uint64_t rank1_end = e - rank0_end;

            // d番目のbitが1のものを使う
            const uint64_t b1 = this->begin_one[depth] + rank1_begin;
            const uint64_t e1 = this->begin_one[depth] + rank1_end;
            if (b1 != e1) {
                const auto c1 = ((c << 1) | 1);
                s.emplace(make_tuple(b1, e1, depth + 1, c1, tight and bit == 1));
            }

            // d番目のbitが0のものを使う
            const uint64_t b0 = rank0_begin;
            const uint64_t e0 = rank0_end;
            if (b0 != e0) {
                if (not tight or bit == 0) {
                    const uint64_t c0 = ((c << 1) | 0);
                    s.emplace(make_tuple(b0, e0, depth + 1, c0, tight));
                }
            }
        }

        return NOTFOUND;
    }

    // T[s1, e1)とT[s2, e2)に共通して出現する要素を求める
    vector<tuple<uint64_t, uint64_t, uint64_t>> intersect(uint64_t _s1, uint64_t _e1, uint64_t _s2, uint64_t _e2) {
        // assert(_s1 < _e1);
        // assert(_s2 < _e2);

        vector<tuple<uint64_t, uint64_t, uint64_t>> intersection;

        queue<tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>> que; // s1, e1, s2, e2, depth, value
        que.push(make_tuple(_s1, _e1, _s2, _e2, 0, 0));
        while (not que.empty()) {
            auto e = que.front(); que.pop();
            uint64_t s1, e1, s2, e2, depth, value;
            tie(s1, e1, s2, e2, depth, value) = e;

            if (depth >= this->bit_size) {
                intersection.emplace_back(make_tuple(value, e1 - s1, e2 - s2));
                continue;
            }

            // 0
            uint64_t s1_0 = this->bit_arrays[depth].rank(0, s1);
            uint64_t e1_0 = this->bit_arrays[depth].rank(0, e1);
            uint64_t s2_0 = this->bit_arrays[depth].rank(0, s2);
            uint64_t e2_0 = this->bit_arrays[depth].rank(0, e2);

            if (s1_0 != e1_0 and s2_0 != e2_0) {
                que.push(make_tuple(s1_0, e1_0, s2_0, e2_0, depth + 1, value));
            }

            // 1
            uint64_t s1_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, s1);
            uint64_t e1_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, e1);
            uint64_t s2_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, s2);
            uint64_t e2_1 = this->begin_one[depth] + this->bit_arrays[depth].rank(1, e2);

            if (s1_1 != e1_1 and s2_1 != e2_1) {
                que.push(make_tuple(s1_1, e1_1, s2_1, e2_1, depth + 1, value | (1 << bit_size - depth - 1)));
            }
        }

        return intersection;
    };

private:
    uint64_t get_num_of_bit(uint64_t x) {
        if (x == 0) return 0;
        x--;
        uint64_t bit_num = 0;
        while (x >> bit_num) {
            ++bit_num;
        }
        return bit_num;
    }

    uint64_t rangeSum(const uint64_t begin, const uint64_t end, const uint64_t depth, const uint64_t c, const uint64_t x, const uint64_t y) {
        if (begin == end) {
            return 0;
        }

        if (depth == bit_size) {
            if (x <= c and c < y) {
                return c * (end - begin);   // 値 * 頻度
            }
            return 0;
        }

        const uint64_t next_c = ((uint64_t)1 << (bit_size - depth - 1)) | c;                   // 上からdepth番目のbitを立てる
        const uint64_t all_one_c = (((uint64_t)1 << (bit_size - depth - 1)) - 1) | next_c;     // depth以降のbitをたてる(これ以降全部1を選んだときの値)
        if(all_one_c < x or y <= c) {
            return 0;
        }

        // [begin, pos)のすべての要素は[x, y)
        if (x <= c and all_one_c < y) {
            return this->cumulative_sum[depth][end] - this->cumulative_sum[depth][begin];
        }

        const uint64_t rank0_begin = this->bit_arrays[depth].rank(0, begin);
        const uint64_t rank0_end = this->bit_arrays[depth].rank(0, end);
        const uint64_t rank1_begin = begin - rank0_begin;
        const uint64_t rank1_end = end - rank0_end;

        return rangeSum(rank0_begin, rank0_end, depth + 1, c, x, y) +
               rangeSum(this->begin_one[depth] + rank1_begin, this->begin_one[depth] + rank1_end, depth + 1, next_c, x, y);
    }
};

uint64_t uabs(uint64_t a, uint64_t b) {
    return max(a, b) - min(a, b);
}

signed main() {
    int N, K;
    cin >> N >> K;

    vector<uint64_t> A(N);
    // uint64_t e10 = 3e9+EPS;
    for (int i = 0; i < N; i++) {
        cin >> A[i];
        // A[i] = e10 + A[i];
    }
    WaveletMatrix WM(A); // 長さNの数列による初期化を行う

    // uint64_t inf = e10 + 1e9+1;
    // uint64_t zero = e10 - 1e9 - 1;

    uint64_t e9 = 1e9+EPS;

    uint64_t ans = INFL;
    for (int i = 0; i + K <= N; i++) {
        uint64_t l = i, r = i + K - 1;
        uint64_t m1 = WM.quantileRange(l, r + 1, (r - l) / 2); // index
        uint64_t vm1 = WM.access(m1); // value

        uint64_t large1 = WM.rangeFreq(l, r + 1, vm1, e9); // value

        uint64_t small1 = WM.rangeFreq(l, r + 1, 0, vm1); // value

        uint64_t sumr = WM.rangeSum(l, r + 1, vm1, e9); // value
        uint64_t suml = WM.rangeSum(l, r + 1, 0, vm1); // value

        // cout << m1 << " " << vm1 << endl;
        // cout << ma1 << " " << vma1 << " " << large1 << endl;
        // cout << mi1 << " " << vmi1 << " " << small1 << endl;
        // cout << suml << " " << sumr  << endl;

        uint64_t s1 = uabs(sumr , vm1 * large1) + 
                      uabs(suml , vm1 * small1);
        ans = min(ans, s1);


        uint64_t m2 = WM.quantileRange(l, r + 1, (r - l + 1) / 2); // index
        if (m2 != NOTFOUND) {
            uint64_t vm2 = WM.access(m2); // value

            uint64_t large2 = WM.rangeFreq(l, r + 1, vm2, e9); // value

            uint64_t small2 = WM.rangeFreq(l, r + 1, 0, vm2 + 1); // value

            uint64_t sumr2 = WM.rangeSum(l, r + 1, vm2, e9); // value
            uint64_t suml2 = WM.rangeSum(l, r + 1, 0, vm2 + 1); // value

            // cout << m2 << " " << vm2 << endl;
            // cout << ma2 << " " << vma2 << " " << large2 << endl;
            // cout << mi2 << " " << vmi2 << " " << small2 << endl;
            // cout << suml2 << " " << sumr2  << endl;

            uint64_t s2 = uabs(sumr2 , vm2 * large2) + 
                          uabs(suml2 , vm2 * small2);
            ans = min(ans, s2);
        }

    }
    cout << ans << endl;
}
0