import sys read = sys.stdin.buffer.read readline = sys.stdin.buffer.readline readlines = sys.stdin.buffer.readlines class BinaryIndexedTree(): def __init__(self, seq): self.size = len(seq) self.depth = self.size.bit_length() self.build(seq) def build(self, seq): data = seq size = self.size for i, x in enumerate(data): j = i + (i & (-i)) if j < size: data[j] += data[i] self.data = data def get_sum(self, i): data = self.data s = 0 while i: s += data[i] i -= i & -i return s def add(self, i, x): data = self.data size = self.size while i < size: data[i] += x i += i & -i N, K, *P = map(int, read().split()) bit = BinaryIndexedTree([0] * (N + 1)) A = [] for x in P: bit.add(x, 1) A.append(x - bit.get_sum(x)) A = A[::-1] def g(N, MOD, x): # とりあえず愚直実装 # return sum(i % MOD for i in range(x, x + N)) ret = -x * (x - 1) // 2 N += x x = 0 q, r = divmod(N, MOD) ret += q * (MOD - 1) * MOD // 2 ret += r * (r - 1) // 2 return ret def f(N, MOD, rep, x, rest): # x, x, ..., x, x+1, x+1, ..., x+1, % MOD の和 # N 項までとる。最初の rest 項は定数。rep 回ずつ。 ret = 0 n = min(N, rest) N -= n ret += x * n x = (x + 1) % MOD q, r = divmod(N, rep) ret += g(q, MOD, x) * rep x = (x + q) % MOD ret += x * r return ret rest = 1 fact = 1 answer = 0 for i, x in enumerate(A): answer += f(K, i + 1, fact, x, rest) rest += fact * (i - x) fact *= (i + 1) MOD = 10**9 + 7 print(answer % MOD)