//拝借、ありがとう……… #define PROBLEM "https://judge.yosupo.jp/problem/matrix_det" #pragma region kyopro_template #define Nyaan_template #include #include #define pb push_back #define eb emplace_back #define fi first #define se second #define each(x, v) for (auto &x : v) #define all(v) (v).begin(), (v).end() #define sz(v) ((int)(v).size()) #define mem(a, val) memset(a, val, sizeof(a)) #define ini(...) \ int __VA_ARGS__; \ in(__VA_ARGS__) #define inl(...) \ long long __VA_ARGS__; \ in(__VA_ARGS__) #define ins(...) \ string __VA_ARGS__; \ in(__VA_ARGS__) #define inc(...) \ char __VA_ARGS__; \ in(__VA_ARGS__) #define in2(s, t) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i]); \ } #define in3(s, t, u) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i], u[i]); \ } #define in4(s, t, u, v) \ for (int i = 0; i < (int)s.size(); i++) { \ in(s[i], t[i], u[i], v[i]); \ } #define rep(i, N) for (long long i = 0; i < (long long)(N); i++) #define repr(i, N) for (long long i = (long long)(N)-1; i >= 0; i--) #define rep1(i, N) for (long long i = 1; i <= (long long)(N); i++) #define repr1(i, N) for (long long i = (N); (long long)(i) > 0; i--) #define reg(i, a, b) for (long long i = (a); i < (b); i++) #define die(...) \ do { \ out(__VA_ARGS__); \ return; \ } while (0) using namespace std; using ll = long long; template using V = vector; using vi = vector; using vl = vector; using vvi = vector>; using vd = V; using vs = V; using vvl = vector>; using P = pair; using vp = vector

; using pii = pair; using vpi = vector>; constexpr int inf = 1001001001; constexpr long long infLL = (1LL << 61) - 1; template inline bool amin(T &x, U y) { return (y < x) ? (x = y, true) : false; } template inline bool amax(T &x, U y) { return (x < y) ? (x = y, true) : false; } template ostream &operator<<(ostream &os, const pair &p) { os << p.first << " " << p.second; return os; } template istream &operator>>(istream &is, pair &p) { is >> p.first >> p.second; return is; } template ostream &operator<<(ostream &os, const vector &v) { int s = (int)v.size(); for (int i = 0; i < s; i++) os << (i ? " " : "") << v[i]; return os; } template istream &operator>>(istream &is, vector &v) { for (auto &x : v) is >> x; return is; } void in() {} template void in(T &t, U &... u) { cin >> t; in(u...); } void out() { cout << "\n"; } template void out(const T &t, const U &... u) { cout << t; if (sizeof...(u)) cout << " "; out(u...); } #ifdef NyaanDebug #define trc(...) \ do { \ cerr << #__VA_ARGS__ << " = "; \ dbg_out(__VA_ARGS__); \ } while (0) #define trca(v, N) \ do { \ cerr << #v << " = "; \ array_out(v, N); \ } while (0) #define trcc(v) \ do { \ cerr << #v << " = {"; \ each(x, v) { cerr << " " << x << ","; } \ cerr << "}" << endl; \ } while (0) template void _cout(const T &c) { cerr << c; } void _cout(const int &c) { if (c == 1001001001) cerr << "inf"; else if (c == -1001001001) cerr << "-inf"; else cerr << c; } void _cout(const unsigned int &c) { if (c == 1001001001) cerr << "inf"; else cerr << c; } void _cout(const long long &c) { if (c == 1001001001 || c == (1LL << 61) - 1) cerr << "inf"; else if (c == -1001001001 || c == -((1LL << 61) - 1)) cerr << "-inf"; else cerr << c; } void _cout(const unsigned long long &c) { if (c == 1001001001 || c == (1LL << 61) - 1) cerr << "inf"; else cerr << c; } template void _cout(const pair &p) { cerr << "{ "; _cout(p.fi); cerr << ", "; _cout(p.se); cerr << " } "; } template void _cout(const vector &v) { int s = v.size(); cerr << "{ "; for (int i = 0; i < s; i++) { cerr << (i ? ", " : ""); _cout(v[i]); } cerr << " } "; } template void _cout(const vector> &v) { cerr << "[ "; for (const auto &x : v) { cerr << endl; _cout(x); cerr << ", "; } cerr << endl << " ] "; } void dbg_out() { cerr << endl; } template void dbg_out(const T &t, const U &... u) { _cout(t); if (sizeof...(u)) cerr << ", "; dbg_out(u...); } template void array_out(const T &v, int s) { cerr << "{ "; for (int i = 0; i < s; i++) { cerr << (i ? ", " : ""); _cout(v[i]); } cerr << " } " << endl; } template void array_out(const T &v, int H, int W) { cerr << "[ "; for (int i = 0; i < H; i++) { cerr << (i ? ", " : ""); array_out(v[i], W); } cerr << " ] " << endl; } #else #define trc(...) #define trca(...) #define trcc(...) #endif inline int popcnt(unsigned long long a) { return __builtin_popcountll(a); } inline int lsb(unsigned long long a) { return __builtin_ctzll(a); } inline int msb(unsigned long long a) { return 63 - __builtin_clzll(a); } template inline int getbit(T a, int i) { return (a >> i) & 1; } template inline void setbit(T &a, int i) { a |= (1LL << i); } template inline void delbit(T &a, int i) { a &= ~(1LL << i); } template int lb(const vector &v, const T &a) { return lower_bound(begin(v), end(v), a) - begin(v); } template int ub(const vector &v, const T &a) { return upper_bound(begin(v), end(v), a) - begin(v); } template int btw(T a, T x, T b) { return a <= x && x < b; } template T ceil(T a, U b) { return (a + b - 1) / b; } constexpr long long TEN(int n) { long long ret = 1, x = 10; while (n) { if (n & 1) ret *= x; x *= x; n >>= 1; } return ret; } template vector mkrui(const vector &v) { vector ret(v.size() + 1); for (int i = 0; i < int(v.size()); i++) ret[i + 1] = ret[i] + v[i]; return ret; }; template vector mkuni(const vector &v) { vector ret(v); sort(ret.begin(), ret.end()); ret.erase(unique(ret.begin(), ret.end()), ret.end()); return ret; } template vector mkord(int N, F f) { vector ord(N); iota(begin(ord), end(ord), 0); sort(begin(ord), end(ord), f); return ord; } template vector mkiota(int N) { vector ret(N); iota(begin(ret), end(ret), 0); return ret; } template vector mkinv(vector &v) { vector inv(v.size()); for (int i = 0; i < (int)v.size(); i++) inv[v[i]] = i; return inv; } struct IoSetupNya { IoSetupNya() { cin.tie(nullptr); ios::sync_with_stdio(false); cout << fixed << setprecision(15); cerr << fixed << setprecision(7); } } iosetupnya; void solve(); int main() { solve(); } #pragma endregion using namespace std; namespace fastio { static constexpr int SZ = 1 << 17; char ibuf[SZ], obuf[SZ]; int pil = 0, pir = 0, por = 0; struct Pre { char num[40000]; constexpr Pre() : num() { for (int i = 0; i < 10000; i++) { int n = i; for (int j = 3; j >= 0; j--) { num[i * 4 + j] = n % 10 + '0'; n /= 10; } } } } constexpr pre; inline void load() { memcpy(ibuf, ibuf + pil, pir - pil); pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin); pil = 0; } inline void flush() { fwrite(obuf, 1, por, stdout); por = 0; } inline void rd(char& c) { c = ibuf[pil++]; } template inline void rd(T& x) { if (pil + 32 > pir) load(); char c; do c = ibuf[pil++]; while (c < '-'); bool minus = 0; if (c == '-') { minus = 1; c = ibuf[pil++]; } x = 0; while (c >= '0') { x = x * 10 + (c & 15); c = ibuf[pil++]; } if (minus) x = -x; } inline void wt(char c) { obuf[por++] = c; } template inline void wt(T x) { if (por > SZ - 32) flush(); if (!x) { obuf[por++] = '0'; return; } if (x < 0) { obuf[por++] = '-'; x = -x; } int i = 12; char buf[16]; while (x >= 10000) { memcpy(buf + i, pre.num + (x % 10000) * 4, 4); x /= 10000; i -= 4; } int d = x < 100 ? (x < 10 ? 1 : 2) : (x < 1000 ? 3 : 4); memcpy(obuf + por, pre.num + x * 4 + 4 - d, d); por += d; memcpy(obuf + por, buf + i + 4, 12 - i); por += 12 - i; } struct Dummy { Dummy() { atexit(flush); } } dummy; } // namespace fastio using fastio::rd; using fastio::wt; using namespace std; template struct LazyMontgomeryModInt { using mint = LazyMontgomeryModInt; using i32 = int32_t; using u32 = uint32_t; using u64 = uint64_t; static constexpr u32 get_r() { u32 ret = mod; for (i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret; return ret; } static constexpr u32 r = get_r(); static constexpr u32 n2 = -u64(mod) % mod; static_assert(r * mod == 1, "invalid, r * mod != 1"); static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30"); static_assert((mod & 1) == 1, "invalid, mod % 2 == 0"); u32 a; constexpr LazyMontgomeryModInt() : a(0) {} constexpr LazyMontgomeryModInt(const int64_t &b) : a(reduce(u64(b % mod + mod) * n2)){}; static constexpr u32 reduce(const u64 &b) { return (b + u64(u32(b) * u32(-r)) * mod) >> 32; } constexpr mint &operator+=(const mint &b) { if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod; return *this; } constexpr mint &operator-=(const mint &b) { if (i32(a -= b.a) < 0) a += 2 * mod; return *this; } constexpr mint &operator*=(const mint &b) { a = reduce(u64(a) * b.a); return *this; } constexpr mint &operator/=(const mint &b) { *this *= b.inverse(); return *this; } constexpr mint operator+(const mint &b) const { return mint(*this) += b; } constexpr mint operator-(const mint &b) const { return mint(*this) -= b; } constexpr mint operator*(const mint &b) const { return mint(*this) *= b; } constexpr mint operator/(const mint &b) const { return mint(*this) /= b; } constexpr bool operator==(const mint &b) const { return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a); } constexpr bool operator!=(const mint &b) const { return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a); } constexpr mint operator-() const { return mint() - mint(*this); } constexpr mint pow(u64 n) const { mint ret(1), mul(*this); while (n > 0) { if (n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } constexpr mint inverse() const { return pow(mod - 2); } friend ostream &operator<<(ostream &os, const mint &b) { return os << b.get(); } friend istream &operator>>(istream &is, mint &b) { int64_t t; is >> t; b = LazyMontgomeryModInt(t); return (is); } constexpr u32 get() const { u32 ret = reduce(a); return ret >= mod ? ret - mod : ret; } static constexpr u32 get_mod() { return mod; } }; using namespace std; using namespace std; __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i my128_mullo_epu32(const __m128i &a, const __m128i &b) { return _mm_mullo_epi32(a, b); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i my128_mulhi_epu32(const __m128i &a, const __m128i &b) { __m128i a13 = _mm_shuffle_epi32(a, 0xF5); __m128i b13 = _mm_shuffle_epi32(b, 0xF5); __m128i prod02 = _mm_mul_epu32(a, b); __m128i prod13 = _mm_mul_epu32(a13, b13); __m128i prod = _mm_unpackhi_epi64(_mm_unpacklo_epi32(prod02, prod13), _mm_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_mul_128(const __m128i &a, const __m128i &b, const __m128i &r, const __m128i &m1) { return _mm_sub_epi32( _mm_add_epi32(my128_mulhi_epu32(a, b), m1), my128_mulhi_epu32(my128_mullo_epu32(my128_mullo_epu32(a, b), r), m1)); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_add_128(const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) { __m128i ret = _mm_sub_epi32(_mm_add_epi32(a, b), m2); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("sse4.2"))) __attribute__((always_inline)) __m128i montgomery_sub_128(const __m128i &a, const __m128i &b, const __m128i &m2, const __m128i &m0) { __m128i ret = _mm_sub_epi32(a, b); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i my256_mullo_epu32(const __m256i &a, const __m256i &b) { return _mm256_mullo_epi32(a, b); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i my256_mulhi_epu32(const __m256i &a, const __m256i &b) { __m256i a13 = _mm256_shuffle_epi32(a, 0xF5); __m256i b13 = _mm256_shuffle_epi32(b, 0xF5); __m256i prod02 = _mm256_mul_epu32(a, b); __m256i prod13 = _mm256_mul_epu32(a13, b13); __m256i prod = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(prod02, prod13), _mm256_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_mul_256(const __m256i &a, const __m256i &b, const __m256i &r, const __m256i &m1) { return _mm256_sub_epi32( _mm256_add_epi32(my256_mulhi_epu32(a, b), m1), my256_mulhi_epu32(my256_mullo_epu32(my256_mullo_epu32(a, b), r), m1)); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_add_256(const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) { __m256i ret = _mm256_sub_epi32(_mm256_add_epi32(a, b), m2); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) __attribute__((always_inline)) __m256i montgomery_sub_256(const __m256i &a, const __m256i &b, const __m256i &m2, const __m256i &m0) { __m256i ret = _mm256_sub_epi32(a, b); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } namespace Gauss { uint32_t a_buf_[4096][4096] __attribute__((aligned(64))); // return value: (rank, (-1) ^ (number of swap time)) template __attribute__((target("avx2"))) pair GaussianElimination( const vector> &m, int LinearEquation = false) { mint(&a)[4096][4096] = *reinterpret_cast(a_buf_); int H = m.size(), W = m[0].size(), rank = 0; int det = 1; for (int i = 0; i < H; i++) for (int j = 0; j < W; j++) a[i][j].a = m[i][j].a; __m256i r = _mm256_set1_epi32(mint::r); __m256i m0 = _mm256_set1_epi32(0); __m256i m1 = _mm256_set1_epi32(mint::get_mod()); __m256i m2 = _mm256_set1_epi32(mint::get_mod() << 1); for (int j = 0; j < (LinearEquation ? (W - 1) : W); j++) { // find basis if (rank == H) break; int idx = -1; for (int i = rank; i < H; i++) { if (a[i][j].get() != 0) idx = i; if (idx != -1) break; } if (idx == -1) { if (LinearEquation) continue; else return {0, 0}; } // swap if (rank != idx) { det = -det; for (int l = j; l < W; l++) swap(a[rank][l], a[idx][l]); } // normalize if (LinearEquation) { if (a[rank][j].get() != 1) { mint coeff = a[rank][j].inverse(); __m256i COEFF = _mm256_set1_epi32(coeff.a); for (int i = j / 8 * 8; i < W; i += 8) { __m256i R = _mm256_load_si256((__m256i *)(a[rank] + i)); __m256i RmulC = montgomery_mul_256(R, COEFF, r, m1); _mm256_store_si256((__m256i *)(a[rank] + i), RmulC); } } } // elimination for (int k = (LinearEquation ? 0 : rank + 1); k < H; k++) { if (k == rank) continue; if (a[k][rank].get() != 0) { mint coeff = a[k][j] / a[rank][j]; __m256i COEFF = _mm256_set1_epi32(coeff.a); for (int i = j / 8 * 8; i < W; i += 8) { __m256i R = _mm256_load_si256((__m256i *)(a[rank] + i)); __m256i K = _mm256_load_si256((__m256i *)(a[k] + i)); __m256i RmulC = montgomery_mul_256(R, COEFF, r, m1); __m256i KmnsR = montgomery_sub_256(K, RmulC, m2, m0); _mm256_store_si256((__m256i *)(a[k] + i), KmnsR); } } } rank++; } return {rank, det}; } // calculate determinant template mint determinant(const vector> &mat) { mint(&a)[4096][4096] = *reinterpret_cast(a_buf_); auto p = GaussianElimination(mat); if (p.first != (int)mat.size()) return mint(0); mint det = p.second; for (int i = 0; i < (int)mat.size(); i++) det *= a[i][i]; return det; } // return V> // 0 column ... one of solutions // 1 ~ (W - rank) column ... bases // if not exist, return empty vector template vector> LinearEquation(vector> A, vector B) { int H = A.size(), W = A[0].size(); for (int i = 0; i < H; i++) A[i].push_back(B[i]); auto p = GaussianElimination(A, true); mint(&a)[4096][4096] = *reinterpret_cast(a_buf_); int rank = p.first; // check if solutions exist for (int i = rank; i < H; ++i) if (a[i][W] != 0) return vector>{}; vector> res(1, vector(W)); vector pivot(W, -1); for (int i = 0, j = 0; i < rank; ++i) { while (a[i][j] == 0) ++j; res[0][j] = a[i][W], pivot[j] = i; } for (int j = 0; j < W; ++j) { if (pivot[j] == -1) { vector x(W); x[j] = -1; for (int k = 0; k < j; ++k) if (pivot[k] != -1) x[k] = a[pivot[k]][j]; res.push_back(x); } } return res; } } // namespace Gauss using namespace Gauss; using mint = LazyMontgomeryModInt<998244353>; using vm = vector; template std::pair GaussElimination(vector> &a, int pivot_end = -1, bool diagonalize = false) { int H = a.size(), W = a[0].size(); int rank = 0, je = pivot_end; if (je == -1) je = W; mint det = 1; for (int j = 0; j < je; j++) { int idx = -1; for (int i = rank; i < H; i++) { if (a[i][j] != mint(0)) { idx = i; break; } } if (idx == -1) { det = 0; continue; } if (rank != idx) { det = -det; swap(a[rank], a[idx]); } det *= a[rank][j]; if (diagonalize && a[rank][j] != mint(1)) { mint coeff = a[rank][j].inverse(); for (int k = j; k < W; k++) a[rank][k] *= coeff; } int is = diagonalize ? 0 : rank + 1; for (int i = is; i < H; i++) { if (i == rank) continue; if (a[i][j] != mint(0)) { mint coeff = a[i][j] / a[rank][j]; for (int k = j; k < W; k++) a[i][k] -= a[rank][k] * coeff; } } rank++; } return make_pair(rank, det); } #line 4 "matrix/inverse-matrix.hpp" template vector> inverse_matrix(const vector>& a) { int N = a.size(); assert(N > 0); assert(N == (int)a[0].size()); vector> m(N, vector(2 * N)); for (int i = 0; i < N; i++) { copy(begin(a[i]), end(a[i]), begin(m[i])); m[i][N + i] = 1; } auto [rank, det] = GaussElimination(m, N, true); if (rank != N) return {}; vector> b(N); for (int i = 0; i < N; i++) { copy(begin(m[i]) + N, end(m[i]), back_inserter(b[i])); } return b; } void solve() { int N; rd(N); V a(N, vm(N)),b(N,vm(N)); int buf; rep(i, N) rep(j, N) { rd(buf); a[i][j] = buf; } rep(i, N) rep(j, N) { rd(buf); b[i][j] = buf; } vector> calc(N+1,vector(N+1)); vector res(N+1); rep(x,N+1){ calc[x][0]=1; rep(i,N)calc[x][i+1]=calc[x][i]*(x+1); rep(i,N)rep(j,N)a[i][j]+=b[i][j]; res[x]=Gauss::determinant(a).get(); } auto tmp=inverse_matrix(calc); rep(i,N+1){ mint ans=0; rep(j,N+1)ans+=tmp[i][j]*res[j]; cout<