#pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; #define all(a) (a).begin(), (a).end() #define rep(i, n) for (ll i = 0; i < (n); i++) #define For(i, a, b) for (ll i = (a); i < (b); i++) #define debug(...) cerr << __LINE__ << " | ", debug_out(#__VA_ARGS__, __VA_ARGS__) #define test_only(x, y, ...) do { if ((x) != (y)) { debug(x, y, ##__VA_ARGS__); } } while (0) #define test_same(x, y, ...) do { if ((x) != (y)) { debug(x, y, ##__VA_ARGS__); abort(); } } while (0) typedef long long ll; typedef unsigned int uint; typedef unsigned long long ull; typedef long double ld; template using P = pair; template using pri_l = priority_queue; template using pri_s = priority_queue, greater>; constexpr int inf = 1000000010; constexpr ll INF = 1000000000000000010; constexpr int mod1e9 = 1000000007; constexpr int mod998 = 998244353; constexpr ld eps = 1e-12; constexpr ld pi = 3.141592653589793238; constexpr ll ten(int n) { return n ? 10 * ten(n - 1) : 1; }; int dx[] = { 1,0,-1,0,1,1,-1,-1,0 }; int dy[] = { 0,1,0,-1,1,-1,1,-1,0 }; ll mul(ll a, ll b) { return (b != 0 && a > INF / b ? INF : a * b); } void fail() { cout << "-1\n"; exit(0); } void no() { cout << "No\n"; exit(0); } template void er(T a) { cout << a << '\n'; exit(0); } template inline bool chmax(T& a, const U& b) { if (a < b) { a = b; return true; } return false; } template inline bool chmin(T& a, const U& b) { if (a > b) { a = b; return true; } return false; } template istream& operator >>(istream& s, vector& v) { for (auto& e : v) s >> e; return s; } template ostream& operator <<(ostream& s, const vector& v) { for (auto& e : v) s << e << ' '; return s; } template ostream& operator << (ostream& s, const pair& p) { s << p.first << ' ' << p.second; return s; } struct fastio { fastio() { cin.tie(0); cout.tie(0); ios::sync_with_stdio(false); cout << fixed << setprecision(20); cerr << fixed << setprecision(20); } }fastio_; namespace rdv { random_device seed_gen; mt19937_64 engine(seed_gen()); ll rnum(ll r) { return engine() % r; } // [0, r) ll rnum(ll l, ll r) { return rnum(r - l) + l; } // [l, r) ll rng(ll l, ll r) { return rnum(l, r + 1); } // [l, r] double rng01() { return engine() * pow(2, -64); } template void shuf(vector& v) { shuffle(all(v), engine); } void shuf(string& s) { shuffle(all(s), engine); } } using namespace rdv; template vector compress(vector v) { int n = ssize(v); vector tmp = v; sort(tmp.begin(), tmp.end()); tmp.erase(unique(tmp.begin(), tmp.end()), tmp.end()); vector res(n); for (int i = 0; i < n; i++) res[i] = lower_bound(tmp.begin(), tmp.end(), v[i]) - tmp.begin(); return res; } #ifdef _MSC_VER int popcount(int v) { return popcount(uint(v)); } int popcount(ll v) { return popcount(ull(v)); } using lint = ll; #else int popcount(int v) { return __builtin_popcount(v); } int popcount(uint v) { return __builtin_popcount(v); } int popcount(ll v) { return __builtin_popcountll(v); } int popcount(ull v) { return __builtin_popcountll(v); } using lint = __int128_t; #endif #include using namespace atcoder; constexpr ll mod = mod998; using mint = static_modint; istream& operator >>(istream& s, mint& m) { ll y; s >> y; m = y; return s; } istream& operator >>(istream& s, vector& v) { for (auto& e : v) { ll y; s >> y; e = y; } return s; } ostream& operator <<(ostream& s, mint& m) { return s << m.val(); } ostream& operator <<(ostream& s, const vector& v) { for (auto& e : v) s << e.val() << ' '; return s; } void debug_out(const char*) { cerr << "\n"; } template void debug_out(const char* names, T value, Args... args) { while (*names == ' ') ++names; const char* comma = strchr(names, ','); if (!comma) { cerr << names << ":" << value << "\n"; } else { cerr.write(names, comma - names) << ":" << value << ", "; debug_out(comma + 1, args...); } } vector fac, inv, facinv; void modcalc(int n) { assert(fac.empty()); assert(inv.empty()); assert(facinv.empty()); fac.resize(n); inv.resize(n); facinv.resize(n); fac[0] = 1; fac[1] = 1; inv[1] = 1; facinv[0] = 1; facinv[1] = 1; for (ll i = 2; i < n; i++) { fac[i] = fac[i - 1] * i; inv[i] = -inv[mod % i] * (mod / i); facinv[i] = facinv[i - 1] * inv[i]; } } mint comb(ll n, ll k) { if (n < 0 or k < 0 or n < k) return 0; return fac[n] * facinv[k] * facinv[n - k]; } mint perm(ll n, ll k) { if (n < 0 or k < 0 or n < k) return 0; return fac[n] * facinv[n - k]; } mint hom(ll n, ll k) { if (n < 0 or k < 0 or (n == 0 && k > 0)) return 0; if (n == 0 && k == 0) return 1; return fac[n + k - 1] * facinv[k] * facinv[n - 1]; } int main() { [[maybe_unused]] bool DEBUG = false; int TEST = 1; // cin >> TEST; while (TEST--) { ll n, m; cin >> n >> m; modcalc(2 * n + 10); vector a(m); cin >> a; sort(all(a)); a.insert(a.begin(), 0); /*vector dp(m + 1); dp[0] = mint(1) / mint(-2); For(i, 1, m + 1) { rep(j, i) { dp[i] += dp[j] * (-2) * fac[a[i] - a[j] + (j == 0 ? 0 : 1)]; } }*/ vector f(n + 1); for (auto& e : a) f[e] = true; vector dp(n + 1); dp[0] = mint(1) / mint(-2); for (auto& e : a) { if (e != 0) dp[e] += fac[e]; } auto rec = [&](auto rec, int L, int R) -> void { if (L + 1 == R) return; int M = midpoint(L, R); rec(rec, L, M); vector A(M - L); rep(i, M - L) A[i] = dp[L + i]; vector B(R - L); rep(i, R - L) B[i] = fac[i + 1] * (-2); if (L == 0) A[0] = 0; auto C = convolution(A, B); For(i, M, R) { if (!f[i]) continue; dp[i] += C[i - L]; } rec(rec, M, R); }; rec(rec, 0, n + 1); mint sum = 0; // For(i, 1, m + 1) sum += dp[i] * fac[n - a[i] + 1]; For(i, 1, n + 1) sum += dp[i] * fac[n - i + 1]; mint ans = fac[n] - sum; cout << ans << '\n'; } }