#pragma GCC target("avx2") #pragma GCC optimize("Ofast") #pragma GCC optimize("unroll-loops") #include using namespace std; #define DEBUG #ifdef DEBUG template ostream &operator<<(ostream &os, const pair &p) { os << '(' << p.first << ',' << p.second << ')'; return os; } template ostream &operator<<(ostream &os, const vector &v) { os << '{'; for(int i = 0; i < (int)v.size(); i++) { if(i) { os << ','; } os << v[i]; } os << '}'; return os; } void debugg() { cerr << endl; } template void debugg(const T &x, const Args &... args) { cerr << " " << x; debugg(args...); } #define debug(...) \ cerr << __LINE__ << " [" << #__VA_ARGS__ << "]: ", debugg(__VA_ARGS__) #define dump(x) cerr << __LINE__ << " " << #x << " = " << (x) << endl #else #define debug(...) (void(0)) #define dump(x) (void(0)) #endif using namespace std; typedef long long ll; typedef vector vl; typedef vector vvl; typedef vector vc; typedef vector vs; typedef vector vb; typedef vector vd; typedef pair P; typedef pair pii; typedef vector

vpl; typedef tuple tapu; #define rep(i,n) for(int i=0; i<(n); i++) #define REP(i,a,b) for(int i=(a); i<(b); i++) #define all(x) (x).begin(), (x).end() #define rall(x) (x).rbegin(), (x).rend() const int inf = 1<<30; const ll linf = 1LL<<62; const int MAX = 510000; int dy[8] = {0,-1,0,1,1,-1,-1,1}; int dx[8] = {-1,0,1,0,1,-1,1,-1}; const double pi = acos(-1); const double eps = 1e-7; template inline bool chmin(T1 &a,T2 b){ if(a>b){ a = b; return true; } else return false; } template inline bool chmax(T1 &a,T2 b){ if(a inline void print(T &a){ int sz = a.size(); for(auto itr = a.begin(); itr != a.end(); itr++){ cout << *itr; sz--; if(sz) cout << " "; } cout << "\n"; } template inline void print2(T1 a, T2 b){ cout << a << " " << b << "\n"; } template inline void print3(T1 a, T2 b, T3 c){ cout << a << " " << b << " " << c << "\n"; } void mark() {cout << "#" << "\n";} ll pcount(ll x) {return __builtin_popcountll(x);} //const int mod = 1e9 + 7; const int mod = 998244353; template class modint { using u64 = std::uint_fast64_t; public: u64 a; constexpr modint(const u64 x = 0) noexcept : a(x % Modulus) {} constexpr u64 &value() noexcept { return a; } constexpr const u64 &value() const noexcept { return a; } constexpr modint operator+(const modint rhs) const noexcept { return modint(*this) += rhs; } constexpr modint operator-(const modint rhs) const noexcept { return modint(*this) -= rhs; } constexpr modint operator*(const modint rhs) const noexcept { return modint(*this) *= rhs; } constexpr modint operator/(const modint rhs) const noexcept { return modint(*this) /= rhs; } constexpr modint &operator+=(const modint rhs) noexcept { a += rhs.a; if (a >= Modulus) { a -= Modulus; } return *this; } constexpr modint &operator-=(const modint rhs) noexcept { if (a < rhs.a) { a += Modulus; } a -= rhs.a; return *this; } constexpr modint &operator*=(const modint rhs) noexcept { a = a * rhs.a % Modulus; return *this; } constexpr modint &operator/=(modint rhs) noexcept { u64 exp = Modulus - 2; while (exp) { if (exp % 2) { *this *= rhs; } rhs *= rhs; exp /= 2; } return *this; } }; using mint = modint; template struct BIT{ vector dat; ll sz; //all 1-indexed BIT(ll sz) : sz(sz){ dat.assign(++sz, 0); } T sum(ll k){ T ret = 0; for(++k; k > 0; k -= k & -k) ret += dat[k]; return (ret); } void add(ll k, T x){ for(++k; k < dat.size(); k += k & -k) dat[k] += x; } ll get(T k){ if(k <= 0) return 0; ll ret = 0; int n = 1; while(n < sz) n *= 2; for(int i=n/2; i>0; i/=2){ if(ret+i < sz && dat[ret+i] < k){ k -= dat[ret+i]; ret += i; } } return ret; } }; template struct Compress{ vector comp; Compress(vector v) : comp(v){ sort(comp.begin(), comp.end()); comp.erase(unique(comp.begin(), comp.end()), comp.end()); } ll get(T x){ return lower_bound(comp.begin(), comp.end(), x) - comp.begin(); } T operator[](int id){ return comp[id]; } ll siz(){ return comp.size(); } }; int main(){ ll n; cin >> n; vl c; vl a(n); rep(i,n) cin >> a[i], c.push_back(a[i]); BIT bit1(n), bit2(n), cnt1(n), cnt2(n); Compress comp(c); mint ans = 0; for(int i=n-1; i>=0; i--){ ll pos = comp.get(a[i]); mint c1 = cnt1.sum(pos-1); mint s1 = bit1.sum(pos-1) + c1 * a[i]; ans += bit2.sum(pos-1) + cnt2.sum(pos-1) * a[i]; bit1.add(pos,a[i]); bit2.add(pos,s1); cnt1.add(pos,1); cnt2.add(pos,c1); } cout << ans.value() << "\n"; }