#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TE template #define TES template #define Z auto #define ep emplace_back #define eb emplace #define fi first #define se second #define bg begin #define ed end #define all(x) bg(x), ed(x) #define ov(a, b, c, d, e, ...) e #define FO1(a) for (int _ = 0; _ < (a); ++_) #define FO2(i, a) for (int i = 0; i < (a); ++i) #define FO3(i, a, b) for (int i = (a); i < (b); ++i) #define FO4(i, a, b, c) for (int i = (a); i < (b); i += (c)) #define FOR(...) ov(__VA_ARGS__, FO4, FO3, FO2, FO1)(__VA_ARGS__) #define FF1(a) for (int _ = (a) - 1; _ >= 0; --_) #define FF2(i, a) for (int i = (a) - 1; i >= 0; --i) #define FF3(i, a, b) for (int i = (b) - 1; i >= (a); --i) #define FF4(i, a, b, c) for (int i = (b) - 1; i >= (a); i -= (c)) #define FOR_R(...) ov(__VA_ARGS__, FF4, FF3, FF2, FF1)(__VA_ARGS__) #define FOR_subset(t, s) for (int t = (s); t > -1; t = (t == 0 ? -1 : (t - 1) & s)) #define sort ranges::sort using namespace std; TE using vc = vector; TE using vvc = vc>; TE using T1 = tuple; TE using T2 = tuple; TE using T3 = tuple; TE using T4 = tuple; TE using max_heap = priority_queue; TE using min_heap = priority_queue, greater>; using u8 = unsigned char; using uint = unsigned int; using ll = long long; using ull = unsigned long long; using ld = long double; using i128 = __int128; using u128 = __uint128_t; using f128 = __float128; using u16 = uint16_t; using PII = pair; using PLL = pair; #ifdef YRSD constexpr bool dbg = 1; #else constexpr bool dbg = 0; #endif istream &operator>>(istream &I, i128 &x) { static string s; I >> s; int f = s[0] == '-'; x = 0; const int N = (int)s.size(); FOR(i, f, N) x = x * 10 + s[i] - '0'; if (f) x = -x; return I; } ostream &operator<<(ostream &O, i128 x) { static string s; s.clear(); bool f = x < 0; if (f) x = -x; while (x) s += '0' + x % 10, x /= 10; if (s.empty()) s += '0'; if (f) s += '-'; reverse(all(s)); return O << s; } istream &operator>>(istream &I, f128 &x) { static string s; I >> s, x = stold(s); return I; } ostream &operator<<(ostream &O, const f128 x) { return O << ld(x); } template istream &operator>>(istream &I, tuple &t) { return apply([&I](Z &...s) { ((I >> s), ...); }, t), I; } template istream &operator>>(istream &I, pair &x) { return I >> x.fi >> x.se; } template ostream &operator<<(ostream &O, const pair &x) { return O << x.fi << ' ' << x.se; } TE requires requires(T &c) { begin(c); end(c); } and (not is_same_v, string>) istream &operator>>(istream &I, T &c) { for (Z &e : c) I >> e; return I; } TE requires requires(const T &c) { begin(c); end(c); } and (not is_same_v, const char*>) and (not is_same_v, string>) and (not is_array_v> or not is_same_v>, char>) ostream &operator<<(ostream &O, const T &a) { if (a.empty()) return O; Z i = a.begin(); O << *i++; for (; i != a.end(); ++i) O << ' ' << *i; return O; } void IN() {} TE void IN(T &x, Z &...s) { cin >> x, IN(s...); } void print() { cout << '\n'; } TES void print(T &&x, S &&...y) { cout << x; if constexpr (sizeof...(S)) cout << ' '; print(forward(y)...); } void put() {} TES void put(T &&x, S &&...y) { cout << x; put(forward(y)...); } #define INT(...) int __VA_ARGS__; IN(__VA_ARGS__) #define UINT(...) uint __VA_ARGS__; IN(__VA_ARGS__) #define LL(...) ll __VA_ARGS__; IN(__VA_ARGS__) #define ULL(...) ull __VA_ARGS__; IN(__VA_ARGS__) #define I128(...) i128 __VA_ARGS__; IN(__VA_ARGS__) #define STR(...) string __VA_ARGS__; IN(__VA_ARGS__) #define CH(...) char __VA_ARGS__; IN(__VA_ARGS__) #define REAL(...) re __VA_ARGS__; IN(__VA_ARGS__) #define VEC(T, a, n) vc a(n); IN(a) void YES(bool o = 1) { print(o ? "YES" : "NO"); } void Yes(bool o = 1) { print(o ? "Yes" : "No"); } void yes(bool o = 1) { print(o ? "yes" : "no"); } void NO(bool o = 1) { YES(not o); } void No(bool o = 1) { Yes(not o); } void no(bool o = 1) { yes(not o); } void ALICE(bool o = 1) { print(o ? "ALICE" : "BOB"); } void Alice(bool o = 1) { print(o ? "Alice" : "Bob"); } void alice(bool o = 1) { print(o ? "alice" : "bob"); } void BOB(bool o = 1) { ALICE(not o); } void Bob(bool o = 1) { Alice(not o); } void bob(bool o = 1) { alice(not o); } void POSSIBLE(bool o = 1) { print(o ? "POSSIBLE" : "IMPOSSIBLE"); } void Possible(bool o = 1) { print(o ? "Possible" : "Impossible"); } void possible(bool o = 1) { print(o ? "possible" : "impossible"); } void IMPOSSIBLE(bool o = 1) { POSSIBLE(not o); } void Impossible(bool o = 1) { Possible(not o); } void impossible(bool o = 1) { possible(not o); } void TAK(bool o = 1) { print(o ? "TAK" : "NIE"); } void NIE(bool o = 1) { TAK(not o); } #if (__cplusplus >= 202002L) #include constexpr ld pi = numbers::pi_v; #endif TE constexpr T inf = numeric_limits::max(); template <> constexpr i128 inf = i128(inf) * 2'000'000'000'000'000'000; template constexpr pair inf> = {inf, inf}; TE constexpr static inline int pc(T x) { return popcount(make_unsigned_t(x)); } constexpr static inline ll si(const Z &a) { return a.size(); } void reverse(Z &a) { reverse(all(a)); } void unique(Z &a) { sort(a); a.erase(unique(all(a)), a.end()); } TE vc inverse(const vc &a) { int N = si(a); vc b(N, -1); FOR(i, N) if (a[i] != -1) b[a[i]] = i; return b; } Z QMAX(const Z &a) { return *max_element(all(a)); } Z QMIN(const Z &a) { return *min_element(all(a)); } TE Z QMAX(T l, T r) { return *max_element(l, r); } TE Z QMIN(T l, T r) { return *min_element(l, r); } constexpr bool chmax(Z &a, const Z &b) { return (a < b ? a = b, 1 : 0); } constexpr bool chmin(Z &a, const Z &b) { return (a > b ? a = b, 1 : 0); } vc argsort(const Z &a) { vc I(si(a)); iota(all(I), 0); sort(I, [&](int i, int k) { return a[i] < a[k] or (a[i] == a[k] and i < k); }); return I; } TE vc rearrange(const vc &a, const vc &I) { int N = si(I); vc b(N); FOR(i, N) b[i] = a[I[i]]; return b; } template vc pre_sum(const vc &a) { int N = si(a); vc c(N + 1); FOR(i, N) c[i + 1] = c[i] + a[i]; if (of == 0) c.erase(c.begin()); return c; } TE constexpr static int topbit(T x) { if (x == 0) return - 1; if constexpr (sizeof(T) <= 4) return 31 - __builtin_clz(x); else return 63 - __builtin_clzll(x); } TE constexpr static int lowbit(T x) { if (x == 0) return -1; if constexpr (sizeof(T) <= 4) return __builtin_ctz(x); else return __builtin_ctzll(x); } TE constexpr T floor(T x, T y) { return x / y - (x % y and (x ^ y) < 0); } TE constexpr T ceil(T x, T y) { return floor(x + y - 1, y); } TE constexpr T bmod(T x, T y) { return x - floor(x, y) * y; } TE constexpr pair divmod(T x, T y) { T q = floor(x, y); return pair{q, x - q * y}; } TE T SUM(const Z &v) { return accumulate(all(v), T()); } TE T SUM(Z l, Z r) { return accumulate(l, r, T()); } int lb(const Z &a, Z x) { return lower_bound(all(a), x) - a.begin(); } TE int lb(T l, T r, Z x) { return lower_bound(l, r, x) - l; } int ub(const Z &a, Z x) { return upper_bound(all(a), x) - a.begin(); } TE int ub(T l, T r, Z x) { return upper_bound(l, r, x) - l; } template ll bina(Z f, ll l, ll r) { if (ck) assert(f(l)); while (abs(l - r) > 1) { ll x = (r + l) >> 1; (f(x) ? l : r) = x; } return l; } TE T bina_real(Z f, T l, T r, int c = 100) { while (c--) { T x = (l + r) / 2; (f(x) ? l : r) = x; } return (l + r) / 2; } TE T pop(vc &a) { T x = a.back(); a.pop_back(); return x; } TE T pop(max_heap &q) { T x = q.top(); q.pop(); return x; } TE T pop(min_heap &q) { T x = q.top(); q.pop(); return x; } char pop(string &s) { char x = s.back(); s.pop_back(); return x; } void setp(int x) { cout << fixed << setprecision(x); } TE inline void sh(vc &a, int N, T b = {}) { a.resize(N, b); } #define c constexpr template struct mint_t { using T = mint_t; static c uint m = mod; uint x; c inline uint val() const { return x; } c mint_t() : x(0) {} TE requires(is_unsigned_v) mint_t(T x) : x(x % m) {} mint_t(u128 x) : x(x % m) {} TE requires(is_signed_v) mint_t(T x) : x((x %= mod) < 0 ? x + mod : x) {} mint_t(i128 x) : x((x %= mod) < 0 ? x + mod : x) {} c T &operator+=(T p) { if ((x += p.x) >= m) x -= m; return *this; } c T &operator-=(T p) { if ((x += m - p.x) >= m) x -= m; return *this; } c T operator+(T p) const { return T(*this) += p; } c T operator-(T p) const { return T(*this) -= p; } c T &operator*=(T p) { x = ull(x) * p.x % m; return *this; } c T operator*(T p) const { return T(*this) *= p; } c T &operator/=(T p) { return *this *= p.inv(); } c T operator/(T p) const { return T(*this) /= p; } c T operator-() const { return T::gen(x ? mod - x : 0); } c T inv() const { int a = x, b = mod, x = 1, y = 0; while (b > 0) { int t = a / b; swap(a -= t * b, b); swap(x -= t * y, y); } return T(x); } c T pow(ll k) const { if (k < 0) return inv().pow(-k); T s(1), a(x); for (; k; k >>= 1, a *= a) { if (k & 1) s *= a; } return s; } c bool operator<(T p) const { return x < p.x; } c bool operator==(T p) const { return x == p.x; } c bool operator!=(T p) const { return x != p.x; } static c T gen(uint x) { T s; s.x = x; return s; } friend istream &operator>>(istream &cin, T &p) { ll t; cin >> t; p = t; return cin; } friend ostream &operator<<(ostream &cout, T p) { return cout << p.x; } static c int get_mod() { return mod; } static c PII ntt_info() { if (mod == 167772161) return {25, 17}; if (mod == 469762049) return {26, 30}; if (mod == 754974721) return {24, 362}; if (mod == 998244353) return {23, 31}; if (mod == 120586241) return {20, 74066978}; if (mod == 880803841) return {23, 211}; if (mod == 943718401) return {22, 663003469}; if (mod == 1004535809) return {21, 582313106}; if (mod == 1012924417) return {21, 368093570}; return {-1, -1}; } static c bool can_ntt() { return ntt_info().fi != -1; } }; #undef c using M99 = mint_t<998244353>; using M17 = mint_t<1000000007>; using M11 = M17; #ifdef FIO template void rd(mint_t &x) { LL(y); x = y; } template void wt(mint_t x) { wt(x.x); } #endif TE requires (T::commute) struct bit_t : T { using X = T::X; using T::op, T::inv, T::unit; int N; vc a; bit_t() {} bit_t(int N) { build(N); } bit_t(int N, Z f) { build(N, f); } bit_t(const vc &a) { build(a); } void build(int M) { N = M, a.assign(N, unit()); } void build(const vc &a) { build(si(a), [&](int i) { return a[i]; }); } void build(int M, Z f) { N = M; a.resize(N); FOR(i, N) a[i] = f(i); FOR(i, 1, N + 1) { int k = i + (i & -i); if (k <= N) a[k - 1] = op(a[i - 1], a[k - 1]); } } void multiply(int i, X x) { for (; i < N; i |= i + 1) a[i] = op(a[i], x); } inline X prod(int i) { assert(i >= 0 and i <= N); X s = unit(); for (; i > 0; i -= i & -i) s = op(s, a[i - 1]); return s; } X prod(int l, int r) { assert(l <= r); return op(prod(r), inv(prod(l))); } X prod_all() { return prod(N); } void set(int i, X x) { multiply(i, op(inv(get(i)), x)); } X get(int i) { return prod(i, i + 1); } vc get_all() { vc s(N); FOR(i, N) s[i] = get(i); return s; } int max_right(Z f, int L = 0) { assert(f(unit())); X s = unit(); int i = L, k = [&]() -> int { while (1) { if (i & 1) s = op(s, inv(a[i - 1])), --i; if (i == 0) return topbit(N) + 1; int k = lowbit(i) - 1; if (i + (1 << k) > N) return k; X t = op(s, a[i + (1 << k) - 1]); if (not f(t)) return k; s = op(s, inv(a[i - 1])), i -= i & -i; } }(); while (k) { --k; if (i + (1 << k) - 1 < N) { X t = op(s, a[i + (1 << k) - 1]); if (f(t)) i += (1 << k), s = t; } } return i; } int kth(X k, int L = 0) { return max_right([&](X x) { return x <= k; }, L); } int min_left(Z f, int R) { assert(f(unit())); X s = unit(); int i = R, k = 0; while (i > 0 and f(s)) { s = op(s, a[i - 1]); k = lowbit(i); i -= i & -i; } if (f(s)) return assert(i == 0), 0; while (k) { --k; X t = op(s, inv(a[i + (1 << k) - 1])); if (not f(t)) i += (1 << k), s = t; } return i + 1; } }; TE struct monoid_add { using X = T; static constexpr inline X op(const X &x, const X &y) { return x + y; } static constexpr inline X inv(const X &x) { return -x; } static constexpr inline X pow(const X &x, ll n) { return X(n) * x; } static constexpr inline X unit() { return X(0); } static constexpr bool commute = 1; }; template mint count_inv_of_permutation(const vc &p, ll K) { int N = si(p); bit_t> bit(N + 1); vc a; for (int x : p) bit.multiply(x, 1), a.ep(x - bit.prod(x + 1)); reverse(a); i128 in = i128(K) + 10, re = 1, fa = 1; mint s; Z ke = [&](i128 n, i128 mod, i128 x) -> mint { mint s = -x * i128(x - 1) / 2; n += x; x = 0; Z [q, r] = divmod(n, mod); s += mint(q) * (mod - 1) * mod / 2; s += r * (r - 1) / 2; return s; }; Z ca = [&](i128 N, i128 mod, i128 re, i128 x, i128 res) -> mint { mint s; i128 n = min(N, res); N -= n; s += mint(x) * n; x = (x + 1) % mod; Z [q, r] = divmod(N, re); s += ke(q, mod, x) * re; x = (x + q) % mod; s += x * r; return s; }; FOR(i, N) { ll x = a[i]; s += ca(K, i + 1, fa, x, re); if (re < in) re += fa * (i - x); if (fa < in) fa *= (i + 1); } return s; } using mint = M11; void Yorisou() { LL(N, K); VEC(int, p ,N); print(count_inv_of_permutation(p, K)); } constexpr int tests = 0, fl = 0, DB = 10; int main() { cin.tie(0)->sync_with_stdio(0); int T = 1; if (fl) cerr.tie(0); if (tests and not fl) IN(T); for (int i = 0; i < T or fl; ++i) { Yorisou(); if (fl and i % DB == 0) cerr << "Case: " << i << '\n'; } return 0; }