#include #include using namespace std; using int64 = long long; const int mod = 1e9 + 7; // const int mod = 998244353; const int64 infll = (1LL << 62) - 1; const int inf = (1 << 30) - 1; struct IoSetup { IoSetup() { cin.tie(nullptr); ios::sync_with_stdio(false); cout << fixed << setprecision(10); cerr << fixed << setprecision(10); } } iosetup; template< typename T1, typename T2 > ostream &operator<<(ostream &os, const pair< T1, T2 > &p) { os << p.first << " " << p.second; return os; } template< typename T1, typename T2 > istream &operator>>(istream &is, pair< T1, T2 > &p) { is >> p.first >> p.second; return is; } template< typename T > ostream &operator<<(ostream &os, const vector< T > &v) { for(int i = 0; i < (int) v.size(); i++) { os << v[i] << (i + 1 != v.size() ? " " : ""); } return os; } template< typename T > istream &operator>>(istream &is, vector< T > &v) { for(T &in: v) is >> in; return is; } template< typename T1, typename T2 > inline bool chmax(T1 &a, T2 b) { return a < b && (a = b, true); } template< typename T1, typename T2 > inline bool chmin(T1 &a, T2 b) { return a > b && (a = b, true); } template< typename T = int64 > vector< T > make_v(size_t a) { return vector< T >(a); } template< typename T, typename... Ts > auto make_v(size_t a, Ts... ts) { return vector< decltype(make_v< T >(ts...)) >(a, make_v< T >(ts...)); } template< typename T, typename V > typename enable_if< is_class< T >::value == 0 >::type fill_v(T &t, const V &v) { t = v; } template< typename T, typename V > typename enable_if< is_class< T >::value != 0 >::type fill_v(T &t, const V &v) { for(auto &e: t) fill_v(e, v); } template< typename F > struct FixPoint : F { FixPoint(F &&f) : F(forward< F >(f)) {} template< typename... Args > decltype(auto) operator()(Args &&... args) const { return F::operator()(*this, forward< Args >(args)...); } }; template< typename F > inline decltype(auto) MFP(F &&f) { return FixPoint< F >{forward< F >(f)}; } /** * @brief Binary-Indexed-Tree(BIT) * @docs docs/binary-indexed-tree.md */ template< typename T > struct BinaryIndexedTree { private: int n; vector< T > data; public: BinaryIndexedTree() = default; explicit BinaryIndexedTree(int n) : n(n) { data.assign(n + 1, 0); } explicit BinaryIndexedTree(const vector< T > &v) : BinaryIndexedTree((int) v.size()) { build(v); } void build(const vector< T > &v) { assert(n == (int) v.size()); for(int i = 1; i <= n; i++) data[i] = v[i - 1]; for(int i = 1; i <= n; i++) { int j = i + (i & -i); if(j <= n) data[j] += data[i]; } } void apply(int k, const T &x) { for(++k; k <= n; k += k & -k) data[k] += x; } T prod(int r) const { T ret = T(); for(; r > 0; r -= r & -r) ret += data[r]; return ret; } T prod(int l, int r) const { return prod(r) - prod(l); } int lower_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] < x) { x -= data[i + k]; i += k; } } return i; } int upper_bound(T x) const { int i = 0; for(int k = 1 << (__lg(n) + 1); k > 0; k >>= 1) { if(i + k <= n && data[i + k] <= x) { x -= data[i + k]; i += k; } } return i; } }; int M, K; /** * @brief Abstract 2D Binary Indexed Tree Compressed(抽象化2次元座圧BIT) */ template< typename T > struct Abstract2DBinaryIndexedTreeCompressed { private: int n; vector< BinaryIndexedTree< T > > data; vector< vector< int > > beet; public: Abstract2DBinaryIndexedTreeCompressed(const vector< int > &hs) : n((int) hs.size() / 2) { vector< int > ord(2 * n); iota(begin(ord), end(ord), 0); sort(begin(ord), end(ord), [&](int a, int b) { return hs[a] < hs[b]; }); beet.resize(n + 1); for(auto &&i: ord) { int d = hs[i]; i /= 2; for(int k = i + 1; k <= n; k += k & -k) { beet[k].emplace_back(d); } } data.reserve(n + 1); for(int k = 0; k <= n; k++) { //beet[k].erase(unique(begin(beet[k]), end(beet[k])), end(beet[k])); vector< int > luz(beet[k].size()); for(int i = 0; i < beet[k].size(); i++) { luz[i] = (beet[k][i] % (2 * M) < M); }; data.emplace_back(luz); } } void apply(int k1, int k2, const T &x) { for(++k1; k1 <= n; k1 += k1 & -k1) { int p = lower_bound(begin(beet[k1]), end(beet[k1]), k2) - begin(beet[k1]); data[k1].apply(p, x); } } T prod(int r1, int r2) const { T ret{0}; for(; r1 > 0; r1 -= r1 & -r1) { int p = lower_bound(begin(beet[r1]), end(beet[r1]), r2) - begin(beet[r1]); ret += data[r1].prod(p); } return ret; } }; int main() { cin >> M >> K; vector< int > A(M * K); for(auto &a: A) cin >> a; vector D(M, vector< int >()); { for(int i = 0; i < M * K; i++) { D[A[i]].emplace_back(i); A[i] += 2 * (D[A[i]].size() - 1) * M; } } vector< int > vs(2 * M * K); BinaryIndexedTree< int > bit(M * K * 2); int64_t now = 0; for(int i = M * K - 1; i >= 0; i--) { now += bit.prod(A[i]); bit.apply(A[i], 1); vs[2 * i + 0] = A[i]; vs[2 * i + 1] = A[i] + M; } Abstract2DBinaryIndexedTreeCompressed< int > mat(vs); int64_t ret = now; for(int $ = 0; $ < M; $++) { for(auto &i: D[$]) { now -= i - mat.prod(i, A[i]); now -= bit.prod(A[i]) - mat.prod(i, A[i]); mat.apply(i, A[i], -1); mat.apply(i, A[i] + M, 1); bit.apply(A[i], -1); A[i] += M; bit.apply(A[i], 1); now += i - mat.prod(i, A[i]); now += bit.prod(A[i]) - mat.prod(i, A[i]); } ret = min(ret, now); } cout << ret << "\n"; }