#include #include using namespace std; #line 1 "structure/wavelet/succinct-indexable-dictionary.cpp" /** * @brief Succinct Indexable Dictionary(完備辞書) */ struct SuccinctIndexableDictionary { size_t length; size_t blocks; vector< unsigned > bit, sum; SuccinctIndexableDictionary() = default; SuccinctIndexableDictionary(size_t length) : length(length), blocks((length + 31) >> 5) { bit.assign(blocks, 0U); sum.assign(blocks, 0U); } void set(int k) { bit[k >> 5] |= 1U << (k & 31); } void build() { sum[0] = 0U; for(int i = 1; i < blocks; i++) { sum[i] = sum[i - 1] + __builtin_popcount(bit[i - 1]); } } bool operator[](int k) { return (bool((bit[k >> 5] >> (k & 31)) & 1)); } int rank(int k) { return (sum[k >> 5] + __builtin_popcount(bit[k >> 5] & ((1U << (k & 31)) - 1))); } int rank(bool val, int k) { return (val ? rank(k) : k - rank(k)); } }; #line 1 "structure/others/binary-indexed-tree.cpp" /** * @brief Binary-Indexed-Tree(BIT) * @docs docs/binary-indexed-tree.md */ template< typename T > struct BinaryIndexedTree { private: int n; vector< T > data; public: BinaryIndexedTree() = default; explicit BinaryIndexedTree(int n) : n(n) { data.assign(n + 1, 0); } explicit BinaryIndexedTree(const vector< T > &v) : BinaryIndexedTree((int) v.size()) { build(v); } void build(const vector< T > &v) { assert(n == (int) v.size()); for(int i = 1; i <= n; i++) data[i] = v[i - 1]; for(int i = 1; i <= n; i++) { int j = i + (i & -i); if(j <= n) data[j] += data[i]; } } void apply(int k, const T &x) { for(++k; k <= n; k += k & -k) data[k] += x; } T prod(int r) const { T ret = T(); for(; r > 0; r -= r & -r) ret += data[r]; return ret; } T prod(int l, int r) const { return prod(r) - prod(l); } int lower_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] < x) { x -= data[i + k]; i += k; } } return i; } int upper_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] <= x) { x -= data[i + k]; i += k; } } return i; } }; #line 3 "structure/wavelet/wavelet-matrix-point-add-rectangle-sum.cpp" /* * @brief Wavelet Matrix Point Add Rectangle Sum * @docs docs/wavelet-matrix-point-add-rectangle-sum.md */ template< typename T, int MAXLOG, typename D > struct WaveletMatrixPointAddRectangleSum { size_t length; SuccinctIndexableDictionary matrix[MAXLOG]; BinaryIndexedTree< D > ds[MAXLOG]; vector< T > v; int mid[MAXLOG]; WaveletMatrixPointAddRectangleSum() = default; WaveletMatrixPointAddRectangleSum(const vector< T > &v, const vector< D > &d) : length(v.size()), v(v) { assert(v.size() == d.size()); vector< int > l(length), r(length), ord(length); iota(begin(ord), end(ord), 0); vector< D > dd(length); for(int level = MAXLOG - 1; level >= 0; level--) { matrix[level] = SuccinctIndexableDictionary(length + 1); int left = 0, right = 0; for(int i = 0; i < length; i++) { if(((v[ord[i]] >> level) & 1)) { matrix[level].set(i); r[right++] = ord[i]; } else { l[left++] = ord[i]; } } mid[level] = left; matrix[level].build(); ord.swap(l); for(int i = 0; i < right; i++) { ord[left + i] = r[i]; } for(int i = 0; i < length; i++) { dd[i] = d[ord[i]]; } ds[level] = BinaryIndexedTree< D >(dd); } } pair< int, int > succ(bool f, int l, int r, int level) { return {matrix[level].rank(f, l) + mid[level] * f, matrix[level].rank(f, r) + mid[level] * f}; } // count d[i] s.t. (l <= i < r) && (v[i] < upper) D rect_sum(int l, int r, T upper) { D ret = 0; for(int level = MAXLOG - 1; level >= 0; level--) { if(((upper >> level) & 1)) { auto nxt = succ(false, l, r, level); ret += ds[level].prod(nxt.first, nxt.second); l = l - nxt.first + mid[level]; r = r - nxt.second + mid[level]; } else { tie(l, r) = succ(false, l, r, level); } } return ret; } D rect_sum(int l, int r, T lower, T upper) { return rect_sum(l, r, upper) - rect_sum(l, r, lower); } void point_add(int k, const D &x) { auto &y = v[k]; for(int level = MAXLOG - 1; level >= 0; level--) { bool f = ((y >> level) & 1); k = matrix[level].rank(f, k) + mid[level] * f; ds[level].apply(k, x); } } }; int main() { int M, K; cin >> M >> K; vector< int > A(M * K); for(auto &a: A) cin >> a; vector D(M, vector< int >()); { for(int i = 0; i < M * K; i++) { D[A[i]].emplace_back(i); A[i] += 2 * (D[A[i]].size() - 1) * M; } } vector< int > vs(M * K * 2), ds(M * K * 2); BinaryIndexedTree< int > bit(M * K * 2); int64_t now = 0; for(int i = M * K - 1; i >= 0; i--) { now += bit.prod(A[i]); bit.apply(A[i], 1); vs[2 * i + 0] = A[i]; ds[2 * i + 0] = 1; vs[2 * i + 1] = A[i] + M; } WaveletMatrixPointAddRectangleSum< int, 19, int > mat(vs, ds); int64_t ret = now; for(int $ = 0; $ < M; $++) { for(auto &i: D[$]) { now -= i - mat.rect_sum(0, 2 * i, A[i]); now -= mat.rect_sum(2 * i + 2, M * K * 2, A[i]); mat.point_add(2 * i + 0, -1); mat.point_add(2 * i + 1, 1); A[i] += M; now += i - mat.rect_sum(0, 2 * i, A[i]); now += mat.rect_sum(2 * i + 2, M * K * 2, A[i]); } ret = min(ret, now); } cout << ret << "\n"; }