#include #pragma GCC optimize("Ofast") #define _GLIBCXX_DEBUG using namespace std; using std::cout; using std::cin; using std::endl; using ll=long long; using ld=long double; ll ILL=1167167167167167167; const int INF=2100000000; const ll mod=998244353; #define rep(i,a) for (ll i=0;i using _pq = priority_queue, greater>; template ll LB(vector &v,T a){return lower_bound(v.begin(),v.end(),a)-v.begin();} template ll UB(vector &v,T a){return upper_bound(v.begin(),v.end(),a)-v.begin();} template bool chmin(T &a,const T &b){if(a>b){a=b;return 1;}else return 0;} template bool chmax(T &a,const T &b){if(a void So(vector &v) {sort(v.begin(),v.end());} template void Sore(vector &v) {sort(v.begin(),v.end(),[](T x,T y){return x>y;});} void yneos(bool a){if(a) cout<<"YES\n"; else cout<<"NO\n";} namespace po167{ template struct seg_sum{ int n; int seg_size; std::vector seg; T e; //要素数、単位元 typeは+演算が定義されているもの seg_sum(int k,T e_in){ e=e_in; n=k; seg_size=1; while(seg_size //p,qのなかみは、重複ありで内容が同じならおけ long long inversion(std::vector &p,std::vector &q){ int n=p.size(); assert(p.size()==q.size()); po167::seg_sum S(n,0); std::vector p_index(n,-1); std::map> m; for(int i=0;i //qのなかみは、0~nの順列の並び替えである必要がある long long inversion(std::vector &q){ int n=q.size(); po167::seg_sum S(n,0); std::vector p_index(n); for(int i=0;i>t; rep(i,t) solve(); } void solve(){ ll M,K; cin>>M>>K; vector q(K),p(M*K),base(M*K); vector diff(M),ind(M); rep(i,M*K){ int a; cin>>a; diff[a]+=M-1-2*q[ind[a]]; q[ind[a]]++; ind[a]++; p[i]=a; base[i]=i%M; } ll tmp=inversion(p,base); ll ans=tmp; rep(i,M){ tmp+=diff[i]; chmin(ans,tmp); //cout<