#include <bits/stdc++.h>
using namespace std;
typedef signed long long ll;

#define _P(...) (void)printf(__VA_ARGS__)
#define FOR(x,to) for(x=0;x<(to);x++)
#define FORR(x,arr) for(auto& x:arr)
#define FORR2(x,y,arr) for(auto& [x,y]:arr)
#define ALL(a) (a.begin()),(a.end())
#define ZERO(a) memset(a,0,sizeof(a))
#define MINUS(a) memset(a,0xff,sizeof(a))
template<class T> bool chmax(T &a, const T &b) { if(a<b){a=b;return 1;}return 0;}
template<class T> bool chmin(T &a, const T &b) { if(a>b){a=b;return 1;}return 0;}
//-------------------------------------------------------

int M,K;
int num[404040];

ll A[404040];
ll D[404040];
void solve() {
	int i,j,k,l,r,x,y; string s;
	
	cin>>M>>K;
	FOR(i,M*K) {
		cin>>x;
		y=num[x]++;
		if(M*(y+1)<=i) {
			A[0]++;
			D[0]+=i-(M*y+x)-1;
			D[x+1]-=M;
		}
		else if(M*y+x<=i) {
			A[0]++;
			D[0]+=i-(M*y+x)-1;
			D[x+1]-=i-(M*y);
			A[x+1]--;
			A[M-(i-(M*y+x))+1]++;
		}
		else if(M*y<=i) {
			A[M*y+x-i+1]++;
			A[x+1]--;
			D[x+1]-=i-(M*y);
		}
	}
	
	ll mi=1LL<<60;
	ll add=0;
	ll sum=0;
	FOR(i,M) {
		add+=A[i];
		sum+=add+D[i];
		mi=min(mi,sum);
	}
	cout<<mi<<endl;
	
	
}


int main(int argc,char** argv){
	string s;int i;
	if(argc==1) ios::sync_with_stdio(false), cin.tie(0);
	FOR(i,argc-1) s+=argv[i+1],s+='\n'; FOR(i,s.size()) ungetc(s[s.size()-1-i],stdin);
	cout.tie(0); solve(); return 0;
}