結果

問題 No.1300 Sum of Inversions
ユーザー masayoshi361masayoshi361
提出日時 2020-11-27 22:07:23
言語 C++14
(gcc 12.3.0 + boost 1.83.0)
結果
TLE  
(最新)
AC  
(最初)
実行時間 -
コード長 20,336 bytes
コンパイル時間 3,681 ms
コンパイル使用メモリ 218,540 KB
実行使用メモリ 31,020 KB
最終ジャッジ日時 2023-10-01 06:08:17
合計ジャッジ時間 50,429 ms
ジャッジサーバーID
(参考情報)
judge13 / judge12
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 1 ms
4,380 KB
testcase_01 AC 2 ms
4,380 KB
testcase_02 AC 2 ms
4,376 KB
testcase_03 AC 1,522 ms
30,040 KB
testcase_04 AC 1,439 ms
30,004 KB
testcase_05 AC 1,179 ms
17,520 KB
testcase_06 AC 1,703 ms
30,536 KB
testcase_07 AC 1,623 ms
30,448 KB
testcase_08 AC 1,800 ms
30,676 KB
testcase_09 AC 1,804 ms
30,508 KB
testcase_10 AC 966 ms
17,228 KB
testcase_11 AC 963 ms
16,996 KB
testcase_12 AC 1,483 ms
30,120 KB
testcase_13 AC 1,440 ms
30,084 KB
testcase_14 TLE -
testcase_15 AC 1,809 ms
30,612 KB
testcase_16 AC 1,546 ms
30,328 KB
testcase_17 AC 920 ms
16,964 KB
testcase_18 AC 1,101 ms
17,328 KB
testcase_19 AC 1,319 ms
29,868 KB
testcase_20 AC 1,335 ms
29,884 KB
testcase_21 AC 1,365 ms
29,880 KB
testcase_22 AC 1,185 ms
17,512 KB
testcase_23 AC 1,725 ms
30,628 KB
testcase_24 AC 1,250 ms
17,624 KB
testcase_25 AC 995 ms
17,296 KB
testcase_26 AC 992 ms
17,032 KB
testcase_27 AC 1,103 ms
17,392 KB
testcase_28 AC 1,867 ms
30,712 KB
testcase_29 AC 1,248 ms
29,992 KB
testcase_30 AC 1,777 ms
30,492 KB
testcase_31 AC 1,190 ms
17,532 KB
testcase_32 AC 1,202 ms
17,460 KB
testcase_33 AC 419 ms
30,924 KB
testcase_34 AC 573 ms
31,020 KB
testcase_35 AC 972 ms
30,980 KB
testcase_36 AC 1,006 ms
30,908 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

/* #region header */
#ifdef LOCAL
#include "cxx-prettyprint-master/prettyprint.hpp"
#define debug(x) cout << x << endl
#else
#define debug(...) 42
#endif

#pragma GCC optimize("Ofast")
#include <bits/stdc++.h>
using namespace std;
// types
using ll = long long;
using ull = unsigned long long;
using ld = long double;
typedef pair<ll, ll> Pl;
typedef pair<int, int> Pi;
typedef vector<ll> vl;
typedef vector<int> vi;
typedef vector<char> vc;
template <typename T>
using mat = vector<vector<T>>;
typedef vector<vector<int>> vvi;
typedef vector<vector<long long>> vvl;
typedef vector<vector<char>> vvc;
template <int mod>
struct modint {
    int x;

    modint() : x(0) {}

    modint(int64_t y) : x(y >= 0 ? y % mod : (mod - (-y) % mod) % mod) {}

    modint& operator+=(const modint& p) {
        if ((x += p.x) >= mod) x -= mod;
        return *this;
    }

    modint& operator-=(const modint& p) {
        if ((x += mod - p.x) >= mod) x -= mod;
        return *this;
    }

    modint& operator*=(const modint& p) {
        x = (int)(1LL * x * p.x % mod);
        return *this;
    }

    modint& operator/=(const modint& p) {
        *this *= p.inverse();
        return *this;
    }

    modint operator-() const { return modint(-x); }

    modint operator+(const modint& p) const { return modint(*this) += p; }

    modint operator-(const modint& p) const { return modint(*this) -= p; }

    modint operator*(const modint& p) const { return modint(*this) *= p; }

    modint operator/(const modint& p) const { return modint(*this) /= p; }

    bool operator==(const modint& p) const { return x == p.x; }

    bool operator!=(const modint& p) const { return x != p.x; }

    modint inverse() const {
        int a = x, b = mod, u = 1, v = 0, t;
        while (b > 0) {
            t = a / b;
            swap(a -= t * b, b);
            swap(u -= t * v, v);
        }
        return modint(u);
    }

    modint pow(int64_t n) const {
        modint ret(1), mul(x);
        while (n > 0) {
            if (n & 1) ret *= mul;
            mul *= mul;
            n >>= 1;
        }
        return ret;
    }

    friend ostream& operator<<(ostream& os, const modint& p) {
        return os << p.x;
    }

    friend istream& operator>>(istream& is, modint& a) {
        int64_t t;
        is >> t;
        a = modint<mod>(t);
        return (is);
    }

    static int get_mod() { return mod; }
};
// abreviations
#define all(x) (x).begin(), (x).end()
#define rall(x) (x).rbegin(), (x).rend()
#define rep_(i, a_, b_, a, b, ...) for (ll i = (a), max_i = (b); i < max_i; i++)
#define rep(i, ...) rep_(i, __VA_ARGS__, __VA_ARGS__, 0, __VA_ARGS__)
#define rrep_(i, a_, b_, a, b, ...) \
    for (ll i = (b - 1), min_i = (a); i >= min_i; i--)
#define rrep(i, ...) rrep_(i, __VA_ARGS__, __VA_ARGS__, 0, __VA_ARGS__)
#define srep(i, a, b, c) for (ll i = (a), max_i = (b); i < max_i; i += c)
#define SZ(x) ((int)(x).size())
#define pb(x) push_back(x)
#define eb(x) emplace_back(x)
#define mp make_pair
//入出力
#define print(x) cout << x << endl
template <class T>
ostream& operator<<(ostream& os, const vector<T>& v) {
    for (auto& e : v) cout << e << " ";
    cout << endl;
    return os;
}
void scan(int& a) { cin >> a; }
void scan(long long& a) { cin >> a; }
void scan(char& a) { cin >> a; }
void scan(double& a) { cin >> a; }
void scan(string& a) { cin >> a; }
template <class T>
void scan(vector<T>& a) {
    for (auto& i : a) scan(i);
}
#define vsum(x) accumulate(all(x), 0LL)
#define vmax(a) *max_element(all(a))
#define vmin(a) *min_element(all(a))
#define lb(c, x) distance((c).begin(), lower_bound(all(c), (x)))
#define ub(c, x) distance((c).begin(), upper_bound(all(c), (x)))
// functions
// gcd(0, x) fails.
ll gcd(ll a, ll b) { return b ? gcd(b, a % b) : a; }
ll lcm(ll a, ll b) { return a / gcd(a, b) * b; }
template <class T>
bool chmax(T& a, const T& b) {
    if (a < b) {
        a = b;
        return 1;
    }
    return 0;
}
template <class T>
bool chmin(T& a, const T& b) {
    if (b < a) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T>
T mypow(T x, ll n) {
    T ret = 1;
    while (n > 0) {
        if (n & 1) (ret *= x);
        (x *= x);
        n >>= 1;
    }
    return ret;
}
ll modpow(ll x, ll n, const ll mod) {
    ll ret = 1;
    while (n > 0) {
        if (n & 1) (ret *= x);
        (x *= x);
        n >>= 1;
        x %= mod;
        ret %= mod;
    }
    return ret;
}

uint64_t my_rand(void) {
    static uint64_t x = 88172645463325252ULL;
    x = x ^ (x << 13);
    x = x ^ (x >> 7);
    return x = x ^ (x << 17);
}
int popcnt(ull x) { return __builtin_popcountll(x); }
// graph template
template <typename T>
struct edge {
    int src, to;
    T cost;

    edge(int to, T cost) : src(-1), to(to), cost(cost) {}

    edge(int src, int to, T cost) : src(src), to(to), cost(cost) {}

    edge& operator=(const int& x) {
        to = x;
        return *this;
    }

    bool operator<(const edge<T>& r) const { return cost < r.cost; }

    operator int() const { return to; }
};
template <typename T>
using Edges = vector<edge<T>>;
template <typename T>
using WeightedGraph = vector<Edges<T>>;
using UnWeightedGraph = vector<vector<int>>;
struct Timer {
    clock_t start_time;
    void start() { start_time = clock(); }
    int lap() {
        // return x ms.
        return (clock() - start_time) * 1000 / CLOCKS_PER_SEC;
    }
};
/* #endregion*/
// constant
#define inf 1000000000ll
#define INF 4000000004000000000LL
#define mod 998244353ll
using mint = modint<mod>;
typedef vector<mint> vmint;
typedef vector<vector<mint>> vvmint;
#define endl '\n'
const long double eps = 0.000000000000001;
const long double PI = 3.141592653589793;
// O(√m)
// a^x = b (mod m)を満たすxの最小値(なければ-1)
// x = p√m+r, p, r < √m
// a^r = bA^p
ll modlog(ll a, ll b, ll m) {
    //√m
    ll sqrt_m = sqrt(m) + 2;
    // a^-√m
    ll A = modpow(a, (mod - 2), mod);
    A = modpow(A, sqrt_m, mod);
    // a^0,...,a^√m
    unordered_map<ll, ll> a_pows;
    ll a_pow = 1;
    rep(i, sqrt_m + 1) {
        a_pows[a_pow] = i;
        a_pow *= a;
        a_pow %= m;
    }
    // A^0,...,A^√m
    ll A_pow = 1;
    rep(i, sqrt_m + 1) {
        if (a_pows.count(A_pow * b)) {
            return i * sqrt_m + a_pows[A_pow * b];
        }
        A_pow *= A;
        A_pow %= m;
    }
    return -1;
}
int64_t euler_phi(int64_t n) {
    int64_t ret = n;
    for (int64_t i = 2; i * i <= n; i++) {
        if (n % i == 0) {
            ret -= ret / i;
            while (n % i == 0) n /= i;
        }
    }
    if (n > 1) ret -= ret / n;
    return ret;
}
template <typename T>
struct BIT {
    vector<T> data;

    BIT(int sz) { data.assign(++sz, 0); }
    //[0, k)
    T sum(int k) {
        T ret = 0;
        for (; k > 0; k -= k & -k) ret += data[k];
        return (ret);
    }

    T sum(int l, int r) { return sum(r) - sum(l); }

    void add(int k, T x) {
        for (++k; k < data.size(); k += k & -k) data[k] += x;
    }

    // 0-indexedでk番目の値を返す。
    int search(long long k) {
        ++k;
        int res = 0;
        int N = 1;
        while (N < (int)data.size()) N *= 2;
        for (int i = N / 2; i > 0; i /= 2) {
            if (res + i < (int)data.size() && data[res + i] < k) {
                k = k - data[res + i];
                res = res + i;
            }
        }
        return res;
    }

    // for debug
    void show() {
        rep(i, SZ(data) - 1) cout << sum(i + 1) - sum(i) << ' ';
        cout << endl;
    }
};
ll inversion_number(vi& x, int n) {
    BIT<int> bit(n);
    ll res = 0;
    for (int& y : x) {
        res += bit.sum(n - 1 - y);
        bit.add(n - 1 - y, 1);
    }
    return res;
}
template <typename T>
struct Compress {
    vector<T> xs;

    Compress() = default;

    Compress(const vector<T>& vs) { add(vs); }

    Compress(const initializer_list<vector<T>>& vs) {
        for (auto& p : vs) add(p);
    }

    void add(const vector<T>& vs) {
        copy(begin(vs), end(vs), back_inserter(xs));
    }

    void add(const T& x) { xs.emplace_back(x); }

    void build() {
        sort(begin(xs), end(xs));
        xs.erase(unique(begin(xs), end(xs)), end(xs));
    }

    vector<int> get(const vector<T>& vs) const {
        vector<int> ret;
        transform(begin(vs), end(vs), back_inserter(ret), [&](const T& x) {
            return lower_bound(begin(xs), end(xs), x) - begin(xs);
        });
        return ret;
    }

    int get(const T& x) const {
        return lower_bound(begin(xs), end(xs), x) - begin(xs);
    }

    const T& operator[](int k) const { return xs[k]; }
};
struct SuccinctIndexableDictionary {
    size_t length;
    size_t blocks;
    vector<unsigned> bit, sum;

    SuccinctIndexableDictionary() = default;

    SuccinctIndexableDictionary(size_t length)
        : length(length), blocks((length + 31) >> 5) {
        bit.assign(blocks, 0U);
        sum.assign(blocks, 0U);
    }

    void set(int k) { bit[k >> 5] |= 1U << (k & 31); }

    void build() {
        sum[0] = 0U;
        for (int i = 1; i < blocks; i++) {
            sum[i] = sum[i - 1] + __builtin_popcount(bit[i - 1]);
        }
    }

    bool operator[](int k) { return (bool((bit[k >> 5] >> (k & 31)) & 1)); }

    int rank(int k) {
        return (sum[k >> 5] +
                __builtin_popcount(bit[k >> 5] & ((1U << (k & 31)) - 1)));
    }

    int rank(bool val, int k) { return (val ? rank(k) : k - rank(k)); }
};
template <typename T, int MAXLOG>
struct WaveletMatrix {
    size_t length;
    SuccinctIndexableDictionary matrix[MAXLOG];
    int mid[MAXLOG];

    WaveletMatrix() = default;

    WaveletMatrix(vector<T> v) : length(v.size()) {
        vector<T> l(length), r(length);
        for (int level = MAXLOG - 1; level >= 0; level--) {
            matrix[level] = SuccinctIndexableDictionary(length + 1);
            int left = 0, right = 0;
            for (int i = 0; i < length; i++) {
                if (((v[i] >> level) & 1)) {
                    matrix[level].set(i);
                    r[right++] = v[i];
                } else {
                    l[left++] = v[i];
                }
            }
            mid[level] = left;
            matrix[level].build();
            v.swap(l);
            for (int i = 0; i < right; i++) {
                v[left + i] = r[i];
            }
        }
    }

    pair<int, int> succ(bool f, int l, int r, int level) {
        return {matrix[level].rank(f, l) + mid[level] * f,
                matrix[level].rank(f, r) + mid[level] * f};
    }

    // v[k]
    T access(int k) {
        T ret = 0;
        for (int level = MAXLOG - 1; level >= 0; level--) {
            bool f = matrix[level][k];
            if (f) ret |= T(1) << level;
            k = matrix[level].rank(f, k) + mid[level] * f;
        }
        return ret;
    }

    T operator[](const int& k) { return access(k); }

    // count i s.t. (0 <= i < r) && v[i] == x
    int rank(const T& x, int r) {
        int l = 0;
        for (int level = MAXLOG - 1; level >= 0; level--) {
            tie(l, r) = succ((x >> level) & 1, l, r, level);
        }
        return r - l;
    }

    // k-th(0-indexed) smallest number in v[l,r)
    T kth_smallest(int l, int r, int k) {
        assert(0 <= k && k < r - l);
        T ret = 0;
        for (int level = MAXLOG - 1; level >= 0; level--) {
            int cnt =
                matrix[level].rank(false, r) - matrix[level].rank(false, l);
            bool f = cnt <= k;
            if (f) {
                ret |= T(1) << level;
                k -= cnt;
            }
            tie(l, r) = succ(f, l, r, level);
        }
        return ret;
    }

    // k-th(0-indexed) largest number in v[l,r)
    T kth_largest(int l, int r, int k) {
        return kth_smallest(l, r, r - l - k - 1);
    }

    // count i s.t. (l <= i < r) && (v[i] < upper)
    int range_freq(int l, int r, T upper) {
        int ret = 0;
        for (int level = MAXLOG - 1; level >= 0; level--) {
            bool f = ((upper >> level) & 1);
            if (f)
                ret +=
                    matrix[level].rank(false, r) - matrix[level].rank(false, l);
            tie(l, r) = succ(f, l, r, level);
        }
        return ret;
    }

    // count i s.t. (l <= i < r) && (lower <= v[i] < upper)
    int range_freq(int l, int r, T lower, T upper) {
        return range_freq(l, r, upper) - range_freq(l, r, lower);
    }

    // max v[i] s.t. (l <= i < r) && (v[i] < upper)
    T prev_value(int l, int r, T upper) {
        int cnt = range_freq(l, r, upper);
        return cnt == 0 ? T(-1) : kth_smallest(l, r, cnt - 1);
    }

    // min v[i] s.t. (l <= i < r) && (lower <= v[i])
    T next_value(int l, int r, T lower) {
        int cnt = range_freq(l, r, lower);
        return cnt == r - l ? T(-1) : kth_smallest(l, r, cnt);
    }
};
/**
 * @brief Lazy-Segment-Tree(遅延伝搬セグメント木)
 * @docs docs/lazy-segment-tree.md
 */
template <typename Monoid, typename OperatorMonoid>
struct LazySegmentTree {
    int n, sz, height;
    vector<Monoid> data;
    vector<OperatorMonoid> lazy;
    using F = function<Monoid(Monoid, Monoid)>;
    using G = function<Monoid(Monoid, OperatorMonoid)>;
    using H = function<OperatorMonoid(OperatorMonoid, OperatorMonoid)>;
    const F f;
    const G g;
    const H h;
    const Monoid M1;
    const OperatorMonoid OM0;

    LazySegmentTree(int n, const F f, const G g, const H h, const Monoid& M1,
                    const OperatorMonoid OM0)
        : n(n), f(f), g(g), h(h), M1(M1), OM0(OM0) {
        sz = 1;
        height = 0;
        while (sz < n) sz <<= 1, height++;
        data.assign(2 * sz, M1);
        lazy.assign(2 * sz, OM0);
    }

    void set(int k, const Monoid& x) { data[k + sz] = x; }

    void build() {
        for (int k = sz - 1; k > 0; k--) {
            data[k] = f(data[2 * k + 0], data[2 * k + 1]);
        }
    }

    inline void propagate(int k) {
        if (lazy[k] != OM0) {
            lazy[2 * k + 0] = h(lazy[2 * k + 0], lazy[k]);
            lazy[2 * k + 1] = h(lazy[2 * k + 1], lazy[k]);
            data[k] = apply(k);
            lazy[k] = OM0;
        }
    }

    inline Monoid apply(int k) {
        return lazy[k] == OM0 ? data[k] : g(data[k], lazy[k]);
    }

    inline void recalc(int k) {
        while (k >>= 1) data[k] = f(apply(2 * k + 0), apply(2 * k + 1));
    }

    inline void thrust(int k) {
        for (int i = height; i > 0; i--) propagate(k >> i);
    }

    void update(int a, int b, const OperatorMonoid& x) {
        if (a >= b) return;
        thrust(a += sz);
        thrust(b += sz - 1);
        for (int l = a, r = b + 1; l < r; l >>= 1, r >>= 1) {
            if (l & 1) lazy[l] = h(lazy[l], x), ++l;
            if (r & 1) --r, lazy[r] = h(lazy[r], x);
        }
        recalc(a);
        recalc(b);
    }

    Monoid query(int a, int b) {
        if (a >= b) return M1;
        thrust(a += sz);
        thrust(b += sz - 1);
        Monoid L = M1, R = M1;
        for (int l = a, r = b + 1; l < r; l >>= 1, r >>= 1) {
            if (l & 1) L = f(L, apply(l++));
            if (r & 1) R = f(apply(--r), R);
        }
        return f(L, R);
    }

    Monoid operator[](const int& k) { return query(k, k + 1); }

    template <typename C>
    int find_subtree(int a, const C& check, Monoid& M, bool type) {
        while (a < sz) {
            propagate(a);
            Monoid nxt =
                type ? f(apply(2 * a + type), M) : f(M, apply(2 * a + type));
            if (check(nxt))
                a = 2 * a + type;
            else
                M = nxt, a = 2 * a + 1 - type;
        }
        return a - sz;
    }

    template <typename C>
    int find_first(int a, const C& check) {
        Monoid L = M1;
        if (a <= 0) {
            if (check(f(L, apply(1)))) return find_subtree(1, check, L, false);
            return -1;
        }
        thrust(a + sz);
        int b = sz;
        for (a += sz, b += sz; a < b; a >>= 1, b >>= 1) {
            if (a & 1) {
                Monoid nxt = f(L, apply(a));
                if (check(nxt)) return find_subtree(a, check, L, false);
                L = nxt;
                ++a;
            }
        }
        return -1;
    }

    template <typename C>
    int find_last(int b, const C& check) {
        Monoid R = M1;
        if (b >= sz) {
            if (check(f(apply(1), R))) return find_subtree(1, check, R, true);
            return -1;
        }
        thrust(b + sz - 1);
        int a = sz;
        for (b += sz; a < b; a >>= 1, b >>= 1) {
            if (b & 1) {
                Monoid nxt = f(apply(--b), R);
                if (check(nxt)) return find_subtree(b, check, R, true);
                R = nxt;
            }
        }
        return -1;
    }

    void show() {
        rep(i, n) cout << query(i, i + 1) << ' ';
        cout << endl;
    }
};

template <class T, class F = T>
T myreplace(T x, F y) {
    if (y != numeric_limits<F>::max()) x = y;
    return x;
}
template <class T>
T mymax(T x, T y) {
    return max(x, y);
}
template <class T>
T mymin(T x, T y) {
    return min(x, y);
}
template <class T, class F = T>
T myadd(T x, F y) {
    return x + y;
}
template <class T>
struct segobj {
    T val;
    int size;
    segobj(T x, int y) : val(x), size(y) {}
    segobj() : val(0), size(0) {}
    segobj& operator+=(const segobj& p) {
        val += p.val;
        size += p.size;
        return *this;
    }
    segobj& operator+=(const T& p) {
        val += p * size;
        return *this;
    }
    segobj& operator=(const T& p) {
        val = p * size;
        return *this;
    }
    friend ostream& operator<<(ostream& os, const segobj& p) {
        return os << p.val;
    }
    segobj operator+(const segobj& p) const { return segobj(*this) += p; }
    segobj operator+(const T& p) const { return segobj(*this) += p; }
};
template <class T>
struct RMRRQ : LazySegmentTree<T, T> {
    using Seg = LazySegmentTree<T, T>;
    RMRRQ(int n)
        : Seg(n, mymax<T>, myreplace<T>, myreplace<T>, numeric_limits<T>::min(),
              numeric_limits<T>::max()) {}
};
template <class T>
struct RmRRQ : LazySegmentTree<T, T> {
    using Seg = LazySegmentTree<T, T>;
    RmRRQ(int n)
        : Seg(n, mymin<T>, myreplace<T>, myreplace<T>, numeric_limits<T>::max(),
              numeric_limits<T>::max()) {}
};
template <class T>
struct RMRAQ : LazySegmentTree<T, T> {
    using Seg = LazySegmentTree<T, T>;
    RMRAQ(int n)
        : Seg(n, mymax<T>, plus<T>(), plus<T>(), numeric_limits<T>::min() / 2,
              T()) {}
};
template <class T>
struct RmRAQ : LazySegmentTree<T, T> {
    using Seg = LazySegmentTree<T, T>;
    RmRAQ(int n)
        : Seg(n, mymin<T>, plus<T>(), plus<T>(), numeric_limits<T>::max() / 2,
              T()) {}
};
template <class T>
struct RSRAQ : LazySegmentTree<segobj<T>, T> {
    using Seg = LazySegmentTree<segobj<T>, T>;
    RSRAQ(int n)
        : Seg(n, plus<segobj<T>>(), myadd<segobj<T>, T>, plus<T>(), segobj<T>(),
              T()) {
        rep(i, n) this->set(i, segobj<T>(0, 1));
        this->build();
    }
    T sum(int l, int r) { return this->query(l, r).val; }
};
template <class T>
struct RSRRQ : LazySegmentTree<segobj<T>, T> {
    using Seg = LazySegmentTree<segobj<T>, T>;
    using obj = segobj<T>;
    RSRRQ(int n)
        : Seg(n, plus<obj>(), myreplace<obj, T>, myreplace<T>, segobj<T>(),
              numeric_limits<T>::max()) {
        rep(i, n) this->set(i, segobj<T>(0, 1));
        this->build();
    }
    T sum(int l, int r) { return this->query(l, r).val; }
};
int main() {
    cin.tie(0);
    ios::sync_with_stdio(0);
    cout << setprecision(30) << fixed;
    int n;
    cin >> n;
    vl a(n);
    scan(a);
    mint ans = 0;
    Compress<ll> cp(a);
    cp.build();
    RSRAQ<mint> segl(n), segr(n);
    RSRAQ<int> cntl(n), cntr(n);
    rep(i, n) {
        int j = cp.get(a[i]);
        segr.update(j + 1, n, a[i]);
        cntr.update(j + 1, n, 1);
    }
    rep(i, n) {
        int j = cp.get(a[i]);
        cntr.update(j + 1, n, -1);
        segr.update(j + 1, n, -a[i]);
        int cl = cntl[j].val;
        int cr = cntr[j].val;
        ans += (mint)a[i] * cl * cr;
        ans += segl[j].val * cr + segr[j].val * cl;
        cntl.update(0, j, 1);
        segl.update(0, j, a[i]);
    }
    print(ans);
}
0