#include #include using namespace std; /** * @brief Binary-Indexed-Tree(BIT) * @docs docs/binary-indexed-tree.md */ template< typename T > struct BinaryIndexedTree { private: int n; vector< T > data; public: BinaryIndexedTree() = default; explicit BinaryIndexedTree(int n) : n(n) { data.assign(n + 1, 0); } explicit BinaryIndexedTree(const vector< T > &v) : BinaryIndexedTree((int) v.size()) { build(v); } void build(const vector< T > &v) { assert(n == (int) v.size()); for(int i = 1; i <= n; i++) data[i] = v[i - 1]; for(int i = 1; i <= n; i++) { int j = i + (i & -i); if(j <= n) data[j] += data[i]; } } void apply(int k, const T &x) { for(++k; k <= n; k += k & -k) data[k] += x; } T prod(int r) const { T ret = T(); for(; r > 0; r -= r & -r) ret += data[r]; return ret; } T prod(int l, int r) const { return prod(r) - prod(l); } int lower_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] < x) { x -= data[i + k]; i += k; } } return i; } int upper_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] <= x) { x -= data[i + k]; i += k; } } return i; } }; /** * @brief Abstract 2D Binary Indexed Tree Compressed(抽象化2次元座圧BIT) */ template< typename T > struct Abstract2DBinaryIndexedTreeCompressed { private: int n; vector< BinaryIndexedTree< T > > data; vector< int > hs; vector< vector< int > > beet; public: Abstract2DBinaryIndexedTreeCompressed(const vector< int > &hs) : n((int) hs.size()), hs(hs) { vector< int > ord(n); iota(begin(ord), end(ord), 0); sort(begin(ord), end(ord), [&](int a, int b) { return hs[a] < hs[b]; }); beet.resize(n + 1); for(auto &&i: ord) { for(int k = i + 1; k <= n; k += k & -k) { beet[k].emplace_back(hs[i]); } } data.reserve(n + 1); for(int k = 0; k <= n; k++) { beet[k].erase(unique(begin(beet[k]), end(beet[k])), end(beet[k])); data.emplace_back(beet[k].size()); } } void apply(int k1, const T &x) { int k2 = hs[k1]; for(++k1; k1 <= n; k1 += k1 & -k1) { int p = lower_bound(begin(beet[k1]), end(beet[k1]), k2) - begin(beet[k1]); data[k1].apply(p, x); } } T prod(int r1, int r2) const { T ret{0}; for(; r1 > 0; r1 -= r1 & -r1) { int p = lower_bound(begin(beet[r1]), end(beet[r1]), r2) - begin(beet[r1]); ret += data[r1].prod(p); } return ret; } }; int main() { int M, K; cin >> M >> K; vector< int > A(M * K); for(auto &a: A) cin >> a; vector D(M, vector< int >()); { for(int i = 0; i < M * K; i++) { D[A[i]].emplace_back(i); A[i] += 2 * (D[A[i]].size() - 1) * M; } } vector< int > vs(M * K * 2), ds(M * K * 2); BinaryIndexedTree< int > bit(M * K * 2); int64_t now = 0; for(int i = M * K - 1; i >= 0; i--) { now += bit.prod(A[i]); bit.apply(A[i], 1); vs[2 * i + 0] = A[i]; ds[2 * i + 0] = 1; vs[2 * i + 1] = A[i] + M; } Abstract2DBinaryIndexedTreeCompressed< int > mat(vs); for(int i = 0; i < M * K; i++) { mat.apply(2 * i + 0, 1); } int64_t ret = now; for(int $ = 0; $ < M; $++) { for(auto &i: D[$]) { now -= i - mat.prod(2 * i, A[i]); now -= bit.prod(A[i]) - mat.prod(2 * i + 2, A[i]); mat.apply(2 * i + 0, -1); mat.apply(2 * i + 1, 1); bit.apply(A[i], -1); A[i] += M; bit.apply(A[i], 1); now += i - mat.prod(2 * i, A[i]); now += bit.prod(A[i]) - mat.prod(2 * i + 2, A[i]); } ret = min(ret, now); } cout << ret << "\n"; }