class BIT(): def __init__(self,n,mod=0): self.BIT = [0]*(n+1) self.num = n self.mod = mod """ return A[1] + A[2] + ... A[idx] in O(log n) """ def query(self,idx): res_sum = 0 mod = self.mod while idx > 0: res_sum += self.BIT[idx] if mod: res_sum %= mod idx -= idx&(-idx) return res_sum """ A[idx] += in O(log n) """ def update(self,idx,x): mod = self.mod while idx <= self.num: self.BIT[idx] += x if mod: self.BIT[idx] %= mod idx += idx&(-idx) return import sys,random,bisect from collections import deque,defaultdict from heapq import heapify,heappop,heappush from itertools import permutations from math import log,gcd input = lambda :sys.stdin.readline() mi = lambda :map(int,input().split()) li = lambda :list(mi()) M,K = mi() A = li() A = [a for a in A] B = [[] for i in range(M)] for i in range(M*K): B[A[i]%M].append(i) for i in range(M): B[i].sort() init = [] for j in range(K): for i in range(M): init.append(B[i][j]) fw = BIT(M*K) tmp_inv = 0 for i in range(M*K): tmp_inv += i - fw.query(init[i]) fw.update(init[i]+1,1) C = [[0 for j in range(K)] for i in range(M)] for j in range(K): tmp = [i for i in range(M)] tmp.sort(key=lambda i:B[i][j]) for i in range(M): C[tmp[i]][j] = (M-1-i) - i res = tmp_inv for i in range(M): tmp_inv += sum(C[i]) res = min(res,tmp_inv) print(res)