結果
問題 | No.1907 DETERMINATION |
ユーザー | kiyoshi0205 |
提出日時 | 2022-04-15 22:34:06 |
言語 | C++23 (gcc 13.3.0 + boost 1.87.0) |
結果 |
TLE
|
実行時間 | - |
コード長 | 20,852 bytes |
コンパイル時間 | 4,050 ms |
コンパイル使用メモリ | 378,280 KB |
実行使用メモリ | 22,144 KB |
最終ジャッジ日時 | 2024-12-25 01:34:04 |
合計ジャッジ時間 | 228,978 ms |
ジャッジサーバーID (参考情報) |
judge3 / judge2 |
(要ログイン)
テストケース
テストケース表示入力 | 結果 | 実行時間 実行使用メモリ |
---|---|---|
testcase_00 | AC | 2 ms
18,460 KB |
testcase_01 | AC | 1 ms
13,632 KB |
testcase_02 | AC | 2 ms
13,640 KB |
testcase_03 | AC | 1 ms
18,168 KB |
testcase_04 | AC | 2 ms
18,348 KB |
testcase_05 | AC | 2 ms
13,768 KB |
testcase_06 | AC | 2 ms
13,636 KB |
testcase_07 | AC | 3,342 ms
22,144 KB |
testcase_08 | AC | 1,185 ms
14,120 KB |
testcase_09 | AC | 2,201 ms
21,460 KB |
testcase_10 | TLE | - |
testcase_11 | TLE | - |
testcase_12 | TLE | - |
testcase_13 | TLE | - |
testcase_14 | TLE | - |
testcase_15 | AC | 1,684 ms
19,284 KB |
testcase_16 | AC | 350 ms
13,768 KB |
testcase_17 | TLE | - |
testcase_18 | TLE | - |
testcase_19 | AC | 90 ms
13,640 KB |
testcase_20 | TLE | - |
testcase_21 | AC | 579 ms
18,292 KB |
testcase_22 | TLE | - |
testcase_23 | TLE | - |
testcase_24 | AC | 2,211 ms
16,640 KB |
testcase_25 | AC | 3 ms
13,768 KB |
testcase_26 | TLE | - |
testcase_27 | TLE | - |
testcase_28 | TLE | - |
testcase_29 | TLE | - |
testcase_30 | AC | 3 ms
18,576 KB |
testcase_31 | TLE | - |
testcase_32 | TLE | - |
testcase_33 | TLE | - |
testcase_34 | TLE | - |
testcase_35 | AC | 3 ms
13,636 KB |
testcase_36 | AC | 3 ms
13,764 KB |
testcase_37 | AC | 3 ms
18,648 KB |
testcase_38 | TLE | - |
testcase_39 | TLE | - |
testcase_40 | TLE | - |
testcase_41 | TLE | - |
testcase_42 | TLE | - |
testcase_43 | TLE | - |
testcase_44 | TLE | - |
testcase_45 | TLE | - |
testcase_46 | TLE | - |
testcase_47 | TLE | - |
testcase_48 | TLE | - |
testcase_49 | TLE | - |
testcase_50 | TLE | - |
testcase_51 | TLE | - |
testcase_52 | AC | 2 ms
12,288 KB |
testcase_53 | TLE | - |
testcase_54 | TLE | - |
testcase_55 | AC | 2 ms
12,160 KB |
testcase_56 | TLE | - |
testcase_57 | TLE | - |
testcase_58 | TLE | - |
testcase_59 | TLE | - |
testcase_60 | TLE | - |
testcase_61 | TLE | - |
testcase_62 | TLE | - |
testcase_63 | TLE | - |
testcase_64 | AC | 2 ms
5,248 KB |
testcase_65 | AC | 2 ms
5,248 KB |
testcase_66 | AC | 2 ms
13,636 KB |
コンパイルメッセージ
main.cpp:557:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 557 | montgomery_sub_256(const __m256i &a, const __m256i &b, const __m256i &m2, | ^~~~~~~~~~~~~~~~~~ main.cpp:549:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 549 | montgomery_add_256(const __m256i &a, const __m256i &b, const __m256i &m2, | ^~~~~~~~~~~~~~~~~~ main.cpp:541:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 541 | montgomery_mul_256(const __m256i &a, const __m256i &b, const __m256i &r, | ^~~~~~~~~~~~~~~~~~ main.cpp:530:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 530 | my256_mulhi_epu32(const __m256i &a, const __m256i &b) { | ^~~~~~~~~~~~~~~~~ main.cpp:525:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 525 | my256_mullo_epu32(const __m256i &a, const __m256i &b) { | ^~~~~~~~~~~~~~~~~ main.cpp:518:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 518 | montgomery_sub_128(const __m128i &a, const __m128i &b, const __m128i &m2, | ^~~~~~~~~~~~~~~~~~ main.cpp:511:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 511 | montgomery_add_128(const __m128i &a, const __m128i &b, const __m128i &m2, | ^~~~~~~~~~~~~~~~~~ main.cpp:503:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 503 | montgomery_mul_128(const __m128i &a, const __m128i &b, const __m128i &r, | ^~~~~~~~~~~~~~~~~~ main.cpp:492:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 492 | my128_mulhi_epu32(const __m128i &a, const __m128i &b) { | ^~~~~~~~~~~~~~~~~ main.cpp:487:1: warning: 'always_inline' function might not be inlinable [-Wattributes] 487 | my128_mullo_epu32(const __m128i &a, const __m128i &b) { | ^~~~~~~~~~~~~~~~~
ソースコード
//拝借、ありがとう……… #define PROBLEM "https://judge.yosupo.jp/problem/matrix_det" #pragma region kyopro_template #define Nyaan_template #include <immintrin.h> #include <bits/stdc++.h> #define pb push_back #define eb emplace_back #define fi first #define se second #define each(x, v) for (auto &x : v) #define all(v) (v).begin(), (v).end() #define sz(v) ((int)(v).size()) #define mem(a, val) memset(a, val, sizeof(a)) #define ini(...) \ int __VA_ARGS__; \ in(__VA_ARGS__) #define inl(...) \ long long __VA_ARGS__; \ in(__VA_ARGS__) #define ins(...) \ string __VA_ARGS__; \ in(__VA_ARGS__) #define inc(...) \ char __VA_ARGS__; \ in(__VA_ARGS__) #define in2(s, t) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i]); \ } #define in3(s, t, u) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i], u[i]); \ } #define in4(s, t, u, v) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i], u[i], v[i]); \ } #define rep(i, N) for (long long i = 0; i < (long long)(N); i++) #define repr(i, N) for (long long i = (long long)(N)-1; i >= 0; i--) #define rep1(i, N) for (long long i = 1; i <= (long long)(N); i++) #define repr1(i, N) for (long long i = (N); (long long)(i) > 0; i--) #define reg(i, a, b) for (long long i = (a); i < (b); i++) #define die(...) \ do { \ out(__VA_ARGS__); \ return; \ } while (0) using namespace std; using ll = long long; template <class T> using V = vector<T>; using vi = vector<int>; using vl = vector<long long>; using vvi = vector<vector<int>>; using vd = V<double>; using vs = V<string>; using vvl = vector<vector<long long>>; using P = pair<long long, long long>; using vp = vector<P>; using pii = pair<int, int>; using vpi = vector<pair<int, int>>; constexpr int inf = 1001001001; constexpr long long infLL = (1LL << 61) - 1; template <typename T, typename U> inline bool amin(T &x, U y) { return (y < x) ? (x = y, true) : false; } template <typename T, typename U> inline bool amax(T &x, U y) { return (x < y) ? (x = y, true) : false; } template <typename T, typename U> ostream &operator<<(ostream &os, const pair<T, U> &p) { os << p.first << " " << p.second; return os; } template <typename T, typename U> istream &operator>>(istream &is, pair<T, U> &p) { is >> p.first >> p.second; return is; } template <typename T> ostream &operator<<(ostream &os, const vector<T> &v) { int s = (int)v.size(); for (int i = 0; i < s; i++) os << (i ? " " : "") << v[i]; return os; } template <typename T> istream &operator>>(istream &is, vector<T> &v) { for (auto &x : v) is >> x; return is; } void in() {} template <typename T, class... U> void in(T &t, U &... u) { cin >> t; in(u...); } void out() { cout << "\n"; } template <typename T, class... U> void out(const T &t, const U &... u) { cout << t; if (sizeof...(u)) cout << " "; out(u...); } #ifdef NyaanDebug #define trc(...) \ do { \ cerr << #__VA_ARGS__ << " = "; \ dbg_out(__VA_ARGS__); \ } while (0) #define trca(v, N) \ do { \ cerr << #v << " = "; \ array_out(v, N); \ } while (0) #define trcc(v) \ do { \ cerr << #v << " = {"; \ each(x, v) { cerr << " " << x << ","; } \ cerr << "}" << endl; \ } while (0) template <typename T> void _cout(const T &c) { cerr << c; } void _cout(const int &c) { if (c == 1001001001) cerr << "inf"; else if (c == -1001001001) cerr << "-inf"; else cerr << c; } void _cout(const unsigned int &c) { if (c == 1001001001) cerr << "inf"; else cerr << c; } void _cout(const long long &c) { if (c == 1001001001 || c == (1LL << 61) - 1) cerr << "inf"; else if (c == -1001001001 || c == -((1LL << 61) - 1)) cerr << "-inf"; else cerr << c; } void _cout(const unsigned long long &c) { if (c == 1001001001 || c == (1LL << 61) - 1) cerr << "inf"; else cerr << c; } template <typename T, typename U> void _cout(const pair<T, U> &p) { cerr << "{ "; _cout(p.fi); cerr << ", "; _cout(p.se); cerr << " } "; } template <typename T> void _cout(const vector<T> &v) { int s = v.size(); cerr << "{ "; for (int i = 0; i < s; i++) { cerr << (i ? ", " : ""); _cout(v[i]); } cerr << " } "; } template <typename T> void _cout(const vector<vector<T>> &v) { cerr << "[ "; for (const auto &x : v) { cerr << endl; _cout(x); cerr << ", "; } cerr << endl << " ] "; } void dbg_out() { cerr << endl; } template <typename T, class... U> void dbg_out(const T &t, const U &... u) { _cout(t); if (sizeof...(u)) cerr << ", "; dbg_out(u...); } template <typename T> void array_out(const T &v, int s) { cerr << "{ "; for (int i = 0; i < s; i++) { cerr << (i ? ", " : ""); _cout(v[i]); } cerr << " } " << endl; } template <typename T> void array_out(const T &v, int H, int W) { cerr << "[ "; for (int i = 0; i < H; i++) { cerr << (i ? ", " : ""); array_out(v[i], W); } cerr << " ] " << endl; } #else #define trc(...) #define trca(...) #define trcc(...) #endif inline int popcnt(unsigned long long a) { return __builtin_popcountll(a); } inline int lsb(unsigned long long a) { return __builtin_ctzll(a); } inline int msb(unsigned long long a) { return 63 - __builtin_clzll(a); } template <typename T> inline int getbit(T a, int i) { return (a >> i) & 1; } template <typename T> inline void setbit(T &a, int i) { a |= (1LL << i); } template <typename T> inline void delbit(T &a, int i) { a &= ~(1LL << i); } template <typename T> int lb(const vector<T> &v, const T &a) { return lower_bound(begin(v), end(v), a) - begin(v); } template <typename T> int ub(const vector<T> &v, const T &a) { return upper_bound(begin(v), end(v), a) - begin(v); } template <typename T> int btw(T a, T x, T b) { return a <= x && x < b; } template <typename T, typename U> T ceil(T a, U b) { return (a + b - 1) / b; } constexpr long long TEN(int n) { long long ret = 1, x = 10; while (n) { if (n & 1) ret *= x; x *= x; n >>= 1; } return ret; } template <typename T> vector<T> mkrui(const vector<T> &v) { vector<T> ret(v.size() + 1); for (int i = 0; i < int(v.size()); i++) ret[i + 1] = ret[i] + v[i]; return ret; }; template <typename T> vector<T> mkuni(const vector<T> &v) { vector<T> ret(v); sort(ret.begin(), ret.end()); ret.erase(unique(ret.begin(), ret.end()), ret.end()); return ret; } template <typename F> vector<int> mkord(int N, F f) { vector<int> ord(N); iota(begin(ord), end(ord), 0); sort(begin(ord), end(ord), f); return ord; } template <typename T = int> vector<T> mkiota(int N) { vector<T> ret(N); iota(begin(ret), end(ret), 0); return ret; } template <typename T> vector<int> mkinv(vector<T> &v) { vector<int> inv(v.size()); for (int i = 0; i < (int)v.size(); i++) inv[v[i]] = i; return inv; } struct IoSetupNya { IoSetupNya() { cin.tie(nullptr); ios::sync_with_stdio(false); cout << fixed << setprecision(15); cerr << fixed << setprecision(7); } } iosetupnya; void solve(); int main() { solve(); } #pragma endregion using namespace std; namespace fastio { static constexpr int SZ = 1 << 17; char ibuf[SZ], obuf[SZ]; int pil = 0, pir = 0, por = 0; struct Pre { char num[40000]; constexpr Pre() : num() { for (int i = 0; i < 10000; i++) { int n = i; for (int j = 3; j >= 0; j--) { num[i * 4 + j] = n % 10 + '0'; n /= 10; } } } } constexpr pre; inline void load() { memcpy(ibuf, ibuf + pil, pir - pil); pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin); pil = 0; } inline void flush() { fwrite(obuf, 1, por, stdout); por = 0; } inline void rd(char& c) { c = ibuf[pil++]; } template <typename T> inline void rd(T& x) { if (pil + 32 > pir) load(); char c; do c = ibuf[pil++]; while (c < '-'); bool minus = 0; if (c == '-') { minus = 1; c = ibuf[pil++]; } x = 0; while (c >= '0') { x = x * 10 + (c & 15); c = ibuf[pil++]; } if (minus) x = -x; } inline void wt(char c) { obuf[por++] = c; } template <typename T> inline void wt(T x) { if (por > SZ - 32) flush(); if (!x) { obuf[por++] = '0'; return; } if (x < 0) { obuf[por++] = '-'; x = -x; } int i = 12; char buf[16]; while (x >= 10000) { memcpy(buf + i, pre.num + (x % 10000) * 4, 4); x /= 10000; i -= 4; } int d = x < 100 ? (x < 10 ? 1 : 2) : (x < 1000 ? 3 : 4); memcpy(obuf + por, pre.num + x * 4 + 4 - d, d); por += d; memcpy(obuf + por, buf + i + 4, 12 - i); por += 12 - i; } struct Dummy { Dummy() { atexit(flush); } } dummy; } // namespace fastio using fastio::rd; using fastio::wt; using namespace std; template <uint32_t mod> struct LazyMontgomeryModInt { using mint = LazyMontgomeryModInt; using i32 = int32_t; using u32 = uint32_t; using u64 = uint64_t; static constexpr u32 get_r() { u32 ret = mod; for (i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret; return ret; } static constexpr u32 r = get_r(); static constexpr u32 n2 = -u64(mod) % mod; static_assert(r * mod == 1, "invalid, r * mod != 1"); static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30"); static_assert((mod & 1) == 1, "invalid, mod % 2 == 0"); u32 a; constexpr LazyMontgomeryModInt() : a(0) {} constexpr LazyMontgomeryModInt(const int64_t &b) : a(reduce(u64(b % mod + mod) * n2)){}; static constexpr u32 reduce(const u64 &b) { return (b + u64(u32(b) * u32(-r)) * mod) >> 32; } constexpr mint &operator+=(const mint &b) { if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod; return *this; } constexpr mint &operator-=(const mint &b) { if (i32(a -= b.a) < 0) a += 2 * mod; return *this; } constexpr mint &operator*=(const mint &b) { a = reduce(u64(a) * b.a); return *this; } constexpr mint &operator/=(const mint &b) { *this *= b.inverse(); return *this; } constexpr mint operator+(const mint &b) const { return mint(*this) += b; } constexpr mint operator-(const mint &b) const { return mint(*this) -= b; } constexpr mint operator*(const mint &b) const { return mint(*this) *= b; } constexpr mint operator/(const mint &b) const { return mint(*this) /= b; } constexpr bool operator==(const mint &b) const { return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a); } constexpr bool operator!=(const mint &b) const { return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a); } constexpr mint operator-() const { return mint() - mint(*this); } constexpr mint pow(u64 n) const { mint ret(1), mul(*this); while (n > 0) { if (n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } constexpr mint inverse() const { return pow(mod - 2); } friend ostream &operator<<(ostream &os, const mint &b) { return os << b.get(); } friend istream &operator>>(istream &is, mint &b) { int64_t t; is >> t; b = LazyMontgomeryModInt<mod>(t); return (is); } constexpr u32 get() const { u32 ret = reduce(a); return ret >= mod ? ret - mod : ret; } static constexpr u32 get_mod() { return mod; } }; using namespace std; using namespace std; __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i my128_mullo_epu32(const __m128i &a, const __m128i &b) { return _mm_mullo_epi32(a, b); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i my128_mulhi_epu32(const __m128i &a, const __m128i &b) { __m128i a13 = _mm_shuffle_epi32(a, 0xF5); __m128i b13 = _mm_shuffle_epi32(b, 0xF5); __m128i prod02 = _mm_mul_epu32(a, b); __m128i prod13 = _mm_mul_epu32(a13, b13); __m128i prod = _mm_unpackhi_epi64(_mm_unpacklo_epi32(prod02, prod13), _mm_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_mul_128(const __m128i &a, const __m128i &b, const __m128i &r, const __m128i &m1) { return _mm_sub_epi32( _mm_add_epi32(my128_mulhi_epu32(a, b), m1), my128_mulhi_epu32(my128_mullo_epu32(my128_mullo_epu32(a, b), r), m1)); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_add_128(const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) { __m128i ret = _mm_sub_epi32(_mm_add_epi32(a, b), m2); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_sub_128(const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) { __m128i ret = _mm_sub_epi32(a, b); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i my256_mullo_epu32(const __m256i &a, const __m256i &b) { return _mm256_mullo_epi32(a, b); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i my256_mulhi_epu32(const __m256i &a, const __m256i &b) { __m256i a13 = _mm256_shuffle_epi32(a, 0xF5); __m256i b13 = _mm256_shuffle_epi32(b, 0xF5); __m256i prod02 = _mm256_mul_epu32(a, b); __m256i prod13 = _mm256_mul_epu32(a13, b13); __m256i prod = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(prod02, prod13), _mm256_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_mul_256(const __m256i &a, const __m256i &b, const __m256i &r, const __m256i &m1) { return _mm256_sub_epi32( _mm256_add_epi32(my256_mulhi_epu32(a, b), m1), my256_mulhi_epu32(my256_mullo_epu32(my256_mullo_epu32(a, b), r), m1)); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_add_256(const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) { __m256i ret = _mm256_sub_epi32(_mm256_add_epi32(a, b), m2); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_sub_256(const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) { __m256i ret = _mm256_sub_epi32(a, b); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } namespace Gauss { uint32_t a_buf_[4096][4096] __attribute__((aligned(64))); // return value: (rank, (-1) ^ (number of swap time)) template <typename mint> __attribute__((target("avx2"))) pair<int, int> GaussianElimination( const vector<vector<mint>> &m, int LinearEquation = false) { mint(&a)[4096][4096] = *reinterpret_cast<mint(*)[4096][4096]>(a_buf_); int H = m.size(), W = m[0].size(), rank = 0; int det = 1; for (int i = 0; i < H; i++) for (int j = 0; j < W; j++) a[i][j].a = m[i][j].a; __m256i r = _mm256_set1_epi32(mint::r); __m256i m0 = _mm256_set1_epi32(0); __m256i m1 = _mm256_set1_epi32(mint::get_mod()); __m256i m2 = _mm256_set1_epi32(mint::get_mod() << 1); for (int j = 0; j < (LinearEquation ? (W - 1) : W); j++) { // find basis if (rank == H) break; int idx = -1; for (int i = rank; i < H; i++) { if (a[i][j].get() != 0) idx = i; if (idx != -1) break; } if (idx == -1) { if (LinearEquation) continue; else return {0, 0}; } // swap if (rank != idx) { det = -det; for (int l = j; l < W; l++) swap(a[rank][l], a[idx][l]); } // normalize if (LinearEquation) { if (a[rank][j].get() != 1) { mint coeff = a[rank][j].inverse(); __m256i COEFF = _mm256_set1_epi32(coeff.a); for (int i = j / 8 * 8; i < W; i += 8) { __m256i R = _mm256_load_si256((__m256i *)(a[rank] + i)); __m256i RmulC = montgomery_mul_256(R, COEFF, r, m1); _mm256_store_si256((__m256i *)(a[rank] + i), RmulC); } } } // elimination for (int k = (LinearEquation ? 0 : rank + 1); k < H; k++) { if (k == rank) continue; if (a[k][rank].get() != 0) { mint coeff = a[k][j] / a[rank][j]; __m256i COEFF = _mm256_set1_epi32(coeff.a); for (int i = j / 8 * 8; i < W; i += 8) { __m256i R = _mm256_load_si256((__m256i *)(a[rank] + i)); __m256i K = _mm256_load_si256((__m256i *)(a[k] + i)); __m256i RmulC = montgomery_mul_256(R, COEFF, r, m1); __m256i KmnsR = montgomery_sub_256(K, RmulC, m2, m0); _mm256_store_si256((__m256i *)(a[k] + i), KmnsR); } } } rank++; } return {rank, det}; } // calculate determinant template <typename mint> mint determinant(const vector<vector<mint>> &mat) { mint(&a)[4096][4096] = *reinterpret_cast<mint(*)[4096][4096]>(a_buf_); auto p = GaussianElimination(mat); if (p.first != (int)mat.size()) return mint(0); mint det = p.second; for (int i = 0; i < (int)mat.size(); i++) det *= a[i][i]; return det; } // return V<V<mint>> // 0 column ... one of solutions // 1 ~ (W - rank) column ... bases // if not exist, return empty vector template <typename mint> vector<vector<mint>> LinearEquation(vector<vector<mint>> A, vector<mint> B) { int H = A.size(), W = A[0].size(); for (int i = 0; i < H; i++) A[i].push_back(B[i]); auto p = GaussianElimination(A, true); mint(&a)[4096][4096] = *reinterpret_cast<mint(*)[4096][4096]>(a_buf_); int rank = p.first; // check if solutions exist for (int i = rank; i < H; ++i) if (a[i][W] != 0) return vector<vector<mint>>{}; vector<vector<mint>> res(1, vector<mint>(W)); vector<int> pivot(W, -1); for (int i = 0, j = 0; i < rank; ++i) { while (a[i][j] == 0) ++j; res[0][j] = a[i][W], pivot[j] = i; } for (int j = 0; j < W; ++j) { if (pivot[j] == -1) { vector<mint> x(W); x[j] = -1; for (int k = 0; k < j; ++k) if (pivot[k] != -1) x[k] = a[pivot[k]][j]; res.push_back(x); } } return res; } } // namespace Gauss using namespace Gauss; using mint = LazyMontgomeryModInt<998244353>; using vm = vector<mint>; template <typename mint> std::pair<int, mint> GaussElimination(vector<vector<mint>> &a, int pivot_end = -1, bool diagonalize = false) { int H = a.size(), W = a[0].size(); int rank = 0, je = pivot_end; if (je == -1) je = W; mint det = 1; for (int j = 0; j < je; j++) { int idx = -1; for (int i = rank; i < H; i++) { if (a[i][j] != mint(0)) { idx = i; break; } } if (idx == -1) { det = 0; continue; } if (rank != idx) { det = -det; swap(a[rank], a[idx]); } det *= a[rank][j]; if (diagonalize && a[rank][j] != mint(1)) { mint coeff = a[rank][j].inverse(); for (int k = j; k < W; k++) a[rank][k] *= coeff; } int is = diagonalize ? 0 : rank + 1; for (int i = is; i < H; i++) { if (i == rank) continue; if (a[i][j] != mint(0)) { mint coeff = a[i][j] / a[rank][j]; for (int k = j; k < W; k++) a[i][k] -= a[rank][k] * coeff; } } rank++; } return make_pair(rank, det); } #line 4 "matrix/inverse-matrix.hpp" template <typename mint> vector<vector<mint>> inverse_matrix(const vector<vector<mint>>& a) { int N = a.size(); assert(N > 0); assert(N == (int)a[0].size()); vector<vector<mint>> m(N, vector<mint>(2 * N)); for (int i = 0; i < N; i++) { copy(begin(a[i]), end(a[i]), begin(m[i])); m[i][N + i] = 1; } auto [rank, det] = GaussElimination(m, N, true); if (rank != N) return {}; vector<vector<mint>> b(N); for (int i = 0; i < N; i++) { copy(begin(m[i]) + N, end(m[i]), back_inserter(b[i])); } return b; } void solve() { int N; rd(N); V<vm> a(N, vm(N)),b(N,vm(N)); int buf; rep(i, N) rep(j, N) { rd(buf); a[i][j] = buf; } rep(i, N) rep(j, N) { rd(buf); b[i][j] = buf; } vector<vector<mint>> calc(N+1,vector<mint>(N+1)); vector<mint> res(N+1); rep(x,N+1){ calc[x][0]=1; rep(i,N)calc[x][i+1]=calc[x][i]*(x+1); rep(i,N)rep(j,N)a[i][j]+=b[i][j]; res[x]=Gauss::determinant(a).get(); } auto tmp=inverse_matrix(calc); rep(i,N+1){ mint ans=0; rep(j,N+1)ans+=tmp[i][j]*res[j]; cout<<ans<<'\n'; } }