#include #include #include #include #include #include #include #include #include #include #include #include #include #define debug_value(x) cerr << "line" << __LINE__ << ":<" << __func__ << ">:" << #x << "=" << x << endl; #define debug(x) cerr << "line" << __LINE__ << ":<" << __func__ << ">:" << x << endl; template inline bool chmax(T& a, T b) { if (a < b) { a = b; return 1; } return 0; } template inline bool chmin(T& a, T b) { if (a > b) { a = b; return 1; } return 0; } using namespace std; typedef long long ll; template vector> vec2d(int n, int m, T v){ return vector>(n, vector(m, v)); } template vector>> vec3d(int n, int m, int k, T v){ return vector>>(n, vector>(m, vector(k, v))); } template void print_vector(vector v, char delimiter=' '){ if(v.empty()) { cout << endl; return; } for(int i = 0; i+1 < v.size(); i++) cout << v[i] << delimiter; cout << v.back() << endl; } template struct bit{ int n; vector data; bit(int n_){ n = 1; while(n < n_) n *= 2; data = vector(n+1); for(int i = 0; i <= n; i++) data[i] = 0; } T sum(int i){ T ret = 0; while(i > 0){ ret += data[i]; i -= i&-i; } return ret; } T all(){ return sum(n); } void add(int i, T x){ while(i <= n){ data[i] += x; i += i&-i; } } }; template class Compress{ public: vector data; int offset; Compress(vector data_, int offset=0): offset(offset){ set st; for(T x: data_) st.insert(x); for(T x: st) data.push_back(x); }; int operator[](T x) { auto p = lower_bound(data.begin(), data.end(), x); assert(x == *p); return offset+(p-data.begin()); } T inv(int x){ return data[x-offset]; } int size(){ return data.size(); } }; int main(){ ios::sync_with_stdio(false); cin.tie(0); cout << setprecision(10) << fixed; int m, k; cin >> m >> k; vector a(m*k); vector> idx(m); for(int i = 0; i < m*k; i++) { cin >> a[i]; idx[a[i]].push_back(i+1); } ll ans = 0; bit bt(k*m); for(int i = 0; i < k; i++){ for(int j = 0; j < m; j++){ ans += bt.all() - bt.sum(idx[j][i]); bt.add(idx[j][i], 1); } } // debug_value(ans); ll tmp = ans; vector> bts(k, bit(m)); vector> idx_cp(k); for(int i = 0; i < k; i++){ vector v(m); for(int j = 0; j < m; j++){ v[j] = idx[j][i]; } auto cp = Compress(v, 1); for(int j = 0; j < m; j++){ idx_cp[i].push_back(cp[v[j]]); } for(int j = 0; j < m; j++){ bts[i].add(idx_cp[i][j], 1); } } for(int j = 0; j < m; j++){ for(int i = 0; i < k; i++){ // jを一番うしろに持ってくる bts[i].add(idx_cp[i][j], -1); tmp -= bts[i].sum(idx_cp[i][j]); tmp += bts[i].all() - bts[i].sum(idx_cp[i][j]); bts[i].add(idx_cp[i][j], 1); } chmin(ans, tmp); } cout << ans << endl; }