#line 1 "test/matrix/yukicoder1907.0.test.cpp" // competitive-verifier: PROBLEM https://yukicoder.me/problems/no/1907 #line 2 "mat_extra.hpp" #line 2 "mat_basic.hpp" #line 2 "poly.hpp" #line 2 "poly_basic.hpp" #line 2 "binomial.hpp" #include #include template class Binomial { std::vector factorial_, invfactorial_; Binomial() : factorial_{Tp(1)}, invfactorial_{Tp(1)} {} void preprocess(int n) { if (const int nn = factorial_.size(); nn < n) { int k = nn; while (k < n) k *= 2; k = std::min(k, Tp::mod()); factorial_.resize(k); invfactorial_.resize(k); for (int i = nn; i < k; ++i) factorial_[i] = factorial_[i - 1] * i; invfactorial_.back() = factorial_.back().inv(); for (int i = k - 2; i >= nn; --i) invfactorial_[i] = invfactorial_[i + 1] * (i + 1); } } public: static const Binomial &get(int n) { static Binomial bin; bin.preprocess(n); return bin; } Tp binom(int n, int m) const { return n < m ? Tp() : factorial_[n] * invfactorial_[m] * invfactorial_[n - m]; } Tp inv(int n) const { return factorial_[n - 1] * invfactorial_[n]; } Tp factorial(int n) const { return factorial_[n]; } Tp inv_factorial(int n) const { return invfactorial_[n]; } }; #line 2 "fft.hpp" #line 4 "fft.hpp" #include #include #include #line 8 "fft.hpp" template class FftInfo { static Tp least_quadratic_nonresidue() { for (int i = 2;; ++i) if (Tp(i).pow((Tp::mod() - 1) / 2) == -1) return Tp(i); } const int ordlog2_; const Tp zeta_; const Tp invzeta_; const Tp imag_; const Tp invimag_; mutable std::vector root_; mutable std::vector invroot_; FftInfo() : ordlog2_(__builtin_ctzll(Tp::mod() - 1)), zeta_(least_quadratic_nonresidue().pow((Tp::mod() - 1) >> ordlog2_)), invzeta_(zeta_.inv()), imag_(zeta_.pow(1LL << (ordlog2_ - 2))), invimag_(-imag_), root_{Tp(1), imag_}, invroot_{Tp(1), invimag_} {} public: static const FftInfo &get() { static FftInfo info; return info; } Tp imag() const { return imag_; } Tp inv_imag() const { return invimag_; } Tp zeta() const { return zeta_; } Tp inv_zeta() const { return invzeta_; } const std::vector &root(int n) const { // [0, n) assert((n & (n - 1)) == 0); if (const int s = root_.size(); s < n) { root_.resize(n); for (int i = __builtin_ctz(s); (1 << i) < n; ++i) { const int j = 1 << i; root_[j] = zeta_.pow(1LL << (ordlog2_ - i - 2)); for (int k = j + 1; k < j * 2; ++k) root_[k] = root_[k - j] * root_[j]; } } return root_; } const std::vector &inv_root(int n) const { // [0, n) assert((n & (n - 1)) == 0); if (const int s = invroot_.size(); s < n) { invroot_.resize(n); for (int i = __builtin_ctz(s); (1 << i) < n; ++i) { const int j = 1 << i; invroot_[j] = invzeta_.pow(1LL << (ordlog2_ - i - 2)); for (int k = j + 1; k < j * 2; ++k) invroot_[k] = invroot_[k - j] * invroot_[j]; } } return invroot_; } }; inline int fft_len(int n) { --n; n |= n >> 1, n |= n >> 2, n |= n >> 4, n |= n >> 8; return (n | n >> 16) + 1; } namespace detail { template inline void butterfly_n(Iterator a, int n, const std::vector::value_type> &root) { assert(n > 0); assert((n & (n - 1)) == 0); const int bn = __builtin_ctz(n); if (bn & 1) { for (int i = 0; i < n / 2; ++i) { const auto a0 = a[i], a1 = a[i + n / 2]; a[i] = a0 + a1, a[i + n / 2] = a0 - a1; } } for (int i = n >> (bn & 1); i >= 4; i /= 4) { const int i4 = i / 4; for (int k = 0; k < i4; ++k) { const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1]; const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3]; const auto a02p = a0 + a2, a02m = a0 - a2; const auto a13p = a1 + a3, a13m = (a1 - a3) * root[1]; a[k + i4 * 0] = a02p + a13p, a[k + i4 * 1] = a02p - a13p; a[k + i4 * 2] = a02m + a13m, a[k + i4 * 3] = a02m - a13m; } for (int j = i, m = 2; j < n; j += i, m += 2) { const auto r = root[m], r2 = r * r, r3 = r2 * r; for (int k = j; k < j + i4; ++k) { const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1] * r; const auto a2 = a[k + i4 * 2] * r2, a3 = a[k + i4 * 3] * r3; const auto a02p = a0 + a2, a02m = a0 - a2; const auto a13p = a1 + a3, a13m = (a1 - a3) * root[1]; a[k + i4 * 0] = a02p + a13p, a[k + i4 * 1] = a02p - a13p; a[k + i4 * 2] = a02m + a13m, a[k + i4 * 3] = a02m - a13m; } } } } template inline void inv_butterfly_n(Iterator a, int n, const std::vector::value_type> &root) { assert(n > 0); assert((n & (n - 1)) == 0); const int bn = __builtin_ctz(n); for (int i = 4; i <= (n >> (bn & 1)); i *= 4) { const int i4 = i / 4; for (int k = 0; k < i4; ++k) { const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1]; const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3]; const auto a01p = a0 + a1, a01m = a0 - a1; const auto a23p = a2 + a3, a23m = (a2 - a3) * root[1]; a[k + i4 * 0] = a01p + a23p, a[k + i4 * 1] = a01m + a23m; a[k + i4 * 2] = a01p - a23p, a[k + i4 * 3] = a01m - a23m; } for (int j = i, m = 2; j < n; j += i, m += 2) { const auto r = root[m], r2 = r * r, r3 = r2 * r; for (int k = j; k < j + i4; ++k) { const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1]; const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3]; const auto a01p = a0 + a1, a01m = a0 - a1; const auto a23p = a2 + a3, a23m = (a2 - a3) * root[1]; a[k + i4 * 0] = a01p + a23p, a[k + i4 * 1] = (a01m + a23m) * r; a[k + i4 * 2] = (a01p - a23p) * r2, a[k + i4 * 3] = (a01m - a23m) * r3; } } } if (bn & 1) { for (int i = 0; i < n / 2; ++i) { const auto a0 = a[i], a1 = a[i + n / 2]; a[i] = a0 + a1, a[i + n / 2] = a0 - a1; } } } } // namespace detail // FFT_n: A(x) |-> bit-reversed order of [A(1), A(zeta_n), ..., A(zeta_n^(n-1))] template inline void fft_n(Iterator a, int n) { using Tp = typename std::iterator_traits::value_type; detail::butterfly_n(a, n, FftInfo::get().root(n / 2)); } template inline void fft(std::vector &a) { fft_n(a.begin(), a.size()); } // IFFT_n: bit-reversed order of [A(1), A(zeta_n), ..., A(zeta_n^(n-1))] |-> A(x) template inline void inv_fft_n(Iterator a, int n) { using Tp = typename std::iterator_traits::value_type; detail::inv_butterfly_n(a, n, FftInfo::get().inv_root(n / 2)); const Tp iv = Tp::mod() - (Tp::mod() - 1) / n; for (int i = 0; i < n; ++i) a[i] *= iv; } template inline void inv_fft(std::vector &a) { inv_fft_n(a.begin(), a.size()); } // IFFT_n^T: A(x) |-> 1/n FFT_n((x^n A(x^(-1))) mod (x^n - 1)) template inline void transposed_inv_fft_n(Iterator a, int n) { using Tp = typename std::iterator_traits::value_type; const Tp iv = Tp::mod() - (Tp::mod() - 1) / n; for (int i = 0; i < n; ++i) a[i] *= iv; detail::butterfly_n(a, n, FftInfo::get().inv_root(n / 2)); } template inline void transposed_inv_fft(std::vector &a) { transposed_inv_fft_n(a.begin(), a.size()); } // FFT_n^T : FFT_n((x^n A(x^(-1))) mod (x^n - 1)) |-> n A(x) template inline void transposed_fft_n(Iterator a, int n) { using Tp = typename std::iterator_traits::value_type; detail::inv_butterfly_n(a, n, FftInfo::get().root(n / 2)); } template inline void transposed_fft(std::vector &a) { transposed_fft_n(a.begin(), a.size()); } template inline std::vector convolution_fft(std::vector a, std::vector b) { if (a.empty() || b.empty()) return {}; const int n = a.size(); const int m = b.size(); const int len = fft_len(n + m - 1); a.resize(len); b.resize(len); fft(a); fft(b); for (int i = 0; i < len; ++i) a[i] *= b[i]; inv_fft(a); a.resize(n + m - 1); return a; } template inline std::vector square_fft(std::vector a) { if (a.empty()) return {}; const int n = a.size(); const int len = fft_len(n * 2 - 1); a.resize(len); fft(a); for (int i = 0; i < len; ++i) a[i] *= a[i]; inv_fft(a); a.resize(n * 2 - 1); return a; } template inline std::vector convolution_naive(const std::vector &a, const std::vector &b) { if (a.empty() || b.empty()) return {}; const int n = a.size(); const int m = b.size(); std::vector res(n + m - 1); for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) res[i + j] += a[i] * b[j]; return res; } template inline std::vector convolution(const std::vector &a, const std::vector &b) { if (std::min(a.size(), b.size()) < 60) return convolution_naive(a, b); if (std::addressof(a) == std::addressof(b)) return square_fft(a); return convolution_fft(a, b); } #line 2 "fps_basic.hpp" #line 2 "semi_relaxed_conv.hpp" #line 5 "semi_relaxed_conv.hpp" #include #include #line 8 "semi_relaxed_conv.hpp" template inline std::enable_if_t &>, std::vector> semi_relaxed_convolution_naive(const std::vector &A, Closure gen, int n) { std::vector B(n), AB(n); for (int i = 0; i < n; ++i) { for (int j = std::max(0, i - (int)A.size() + 1); j < i; ++j) AB[i] += A[i - j] * B[j]; B[i] = gen(i, AB); if (!A.empty()) AB[i] += A[0] * B[i]; } return B; } // returns coefficients generated by closure // closure: gen(index, current_product) template inline std::enable_if_t &>, std::vector> semi_relaxed_convolution(const std::vector &A, Closure gen, int n) { if (A.size() < 60) return semi_relaxed_convolution_naive(A, gen, n); enum { BaseCaseSize = 32 }; static_assert((BaseCaseSize & (BaseCaseSize - 1)) == 0); static const int Block[] = {16, 16, 16, 16, 16}; static const int BlockSize[] = { BaseCaseSize, BaseCaseSize * Block[0], BaseCaseSize * Block[0] * Block[1], BaseCaseSize * Block[0] * Block[1] * Block[2], BaseCaseSize * Block[0] * Block[1] * Block[2] * Block[3], BaseCaseSize * Block[0] * Block[1] * Block[2] * Block[3] * Block[4], }; // returns (which_block, level) auto blockinfo = [](int ind) { int i = ind / BaseCaseSize, lv = 0; while ((i & (Block[lv] - 1)) == 0) i /= Block[lv++]; return std::make_pair(i & (Block[lv] - 1), lv); }; std::vector B(n), AB(n); std::vector>> dftA, dftB; for (int i = 0; i < n; ++i) { const int s = i & (BaseCaseSize - 1); // block contribution if (i >= BaseCaseSize && s == 0) { const auto [j, lv] = blockinfo(i); const int blocksize = BlockSize[lv]; if (blocksize * j == i) { if ((int)dftA.size() == lv) { dftA.emplace_back(); dftB.emplace_back(Block[lv] - 1); } if ((j - 1) * blocksize < (int)A.size()) { dftA[lv] .emplace_back(A.begin() + (j - 1) * blocksize, A.begin() + std::min((j + 1) * blocksize, A.size())) .resize(blocksize * 2); fft(dftA[lv][j - 1]); } } if (!dftA[lv].empty()) { dftB[lv][j - 1].resize(blocksize * 2); std::copy_n(B.begin() + (i - blocksize), blocksize, dftB[lv][j - 1].begin()); std::fill_n(dftB[lv][j - 1].begin() + blocksize, blocksize, Tp(0)); fft(dftB[lv][j - 1]); // middle product std::vector mp(blocksize * 2); for (int k = 0; k < std::min(j, dftA[lv].size()); ++k) for (int l = 0; l < blocksize * 2; ++l) mp[l] += dftA[lv][k][l] * dftB[lv][j - 1 - k][l]; inv_fft(mp); for (int k = 0; k < blocksize && i + k < n; ++k) AB[i + k] += mp[k + blocksize]; } } // basecase contribution for (int j = std::max(i - s, i - (int)A.size() + 1); j < i; ++j) AB[i] += A[i - j] * B[j]; B[i] = gen(i, AB); if (!A.empty()) AB[i] += A[0] * B[i]; } return B; } #line 8 "fps_basic.hpp" template inline int order(const std::vector &a) { for (int i = 0; i < (int)a.size(); ++i) if (a[i] != 0) return i; return -1; } template inline std::vector fps_inv(const std::vector &a, int n) { assert(order(a) == 0); if (n <= 0) return {}; if (std::min(a.size(), n) < 60) return semi_relaxed_convolution( a, [v = a[0].inv()](int n, auto &&c) { return n == 0 ? v : -c[n] * v; }, n); enum { Threshold = 32 }; const int len = fft_len(n); std::vector invA, shopA(len), shopB(len); invA = semi_relaxed_convolution( a, [v = a[0].inv()](int n, auto &&c) { return n == 0 ? v : -c[n] * v; }, Threshold); invA.resize(len); for (int i = Threshold * 2; i <= len; i *= 2) { std::fill(std::copy_n(a.begin(), std::min(a.size(), i), shopA.begin()), shopA.begin() + i, Tp(0)); std::copy_n(invA.begin(), i, shopB.begin()); fft_n(shopA.begin(), i); fft_n(shopB.begin(), i); for (int j = 0; j < i; ++j) shopA[j] *= shopB[j]; inv_fft_n(shopA.begin(), i); std::fill_n(shopA.begin(), i / 2, Tp(0)); fft_n(shopA.begin(), i); for (int j = 0; j < i; ++j) shopA[j] *= shopB[j]; inv_fft_n(shopA.begin(), i); for (int j = i / 2; j < i; ++j) invA[j] = -shopA[j]; } invA.resize(n); return invA; } template inline std::vector fps_div(const std::vector &a, const std::vector &b, int n) { assert(order(b) == 0); if (n <= 0) return {}; return semi_relaxed_convolution( b, [&, v = b[0].inv()](int n, auto &&c) { if (n < (int)a.size()) return (a[n] - c[n]) * v; return -c[n] * v; }, n); } template inline std::vector deriv(const std::vector &a) { const int n = (int)a.size() - 1; if (n <= 0) return {}; std::vector res(n); for (int i = 1; i <= n; ++i) res[i - 1] = a[i] * i; return res; } template inline std::vector integr(const std::vector &a, Tp c = {}) { const int n = a.size() + 1; auto &&bin = Binomial::get(n); std::vector res(n); res[0] = c; for (int i = 1; i < n; ++i) res[i] = a[i - 1] * bin.inv(i); return res; } template inline std::vector fps_log(const std::vector &a, int n) { return integr(fps_div(deriv(a), a, n - 1)); } template inline std::vector fps_exp(const std::vector &a, int n) { if (n <= 0) return {}; assert(a.empty() || a[0] == 0); return semi_relaxed_convolution( deriv(a), [bin = Binomial::get(n)](int n, auto &&c) { return n == 0 ? Tp(1) : c[n - 1] * bin.inv(n); }, n); } template inline std::vector fps_pow(std::vector a, long long e, int n) { if (n <= 0) return {}; if (e == 0) { std::vector res(n); res[0] = 1; return res; } const int o = order(a); if (o < 0 || o > n / e || (o == n / e && n % e == 0)) return std::vector(n); if (o != 0) a.erase(a.begin(), a.begin() + o); const Tp ia0 = a[0].inv(); const Tp a0e = a[0].pow(e); const Tp me = e; for (int i = 0; i < (int)a.size(); ++i) a[i] *= ia0; a = fps_log(a, n - o * e); for (int i = 0; i < (int)a.size(); ++i) a[i] *= me; a = fps_exp(a, n - o * e); for (int i = 0; i < (int)a.size(); ++i) a[i] *= a0e; a.insert(a.begin(), o * e, Tp(0)); return a; } #line 7 "poly_basic.hpp" #include #line 10 "poly_basic.hpp" template inline int degree(const std::vector &a) { int n = (int)a.size() - 1; while (n >= 0 && a[n] == 0) --n; return n; } template inline void shrink(std::vector &a) { a.resize(degree(a) + 1); } template inline std::vector taylor_shift(std::vector a, Tp c) { const int n = a.size(); auto &&bin = Binomial::get(n); for (int i = 0; i < n; ++i) a[i] *= bin.factorial(i); Tp cc = 1; std::vector b(n); for (int i = 0; i < n; ++i) { b[i] = cc * bin.inv_factorial(i); cc *= c; } std::reverse(a.begin(), a.end()); auto ab = convolution(a, b); ab.resize(n); std::reverse(ab.begin(), ab.end()); for (int i = 0; i < n; ++i) ab[i] *= bin.inv_factorial(i); return ab; } // returns (quotient, remainder) // O(deg(Q)deg(B)) template inline std::array, 2> euclidean_div_naive(const std::vector &A, const std::vector &B) { const int degA = degree(A); const int degB = degree(B); assert(degB >= 0); const int degQ = degA - degB; if (degQ < 0) return {std::vector{Tp(0)}, A}; std::vector Q(degQ + 1), R = A; const auto inv = B[degB].inv(); for (int i = degQ, n = degA; i >= 0; --i) if ((Q[i] = R[n--] * inv) != 0) for (int j = 0; j <= degB; ++j) R[i + j] -= Q[i] * B[j]; R.resize(degB); return {Q, R}; } // O(min(deg(Q)^2,deg(Q)deg(B))) template inline std::vector euclidean_div_quotient_naive(const std::vector &A, const std::vector &B) { const int degA = degree(A); const int degB = degree(B); assert(degB >= 0); const int degQ = degA - degB; if (degQ < 0) return {Tp(0)}; const auto inv = B[degB].inv(); std::vector Q(degQ + 1); for (int i = 0; i <= degQ; ++i) { for (int j = 1; j <= std::min(i, degB); ++j) Q[degQ - i] += B[degB - j] * Q[degQ - i + j]; Q[degQ - i] = (A[degA - i] - Q[degQ - i]) * inv; } return Q; } // returns (quotient, remainder) template inline std::array, 2> euclidean_div(const std::vector &A, const std::vector &B) { const int degA = degree(A); const int degB = degree(B); assert(degB >= 0); // A = Q*B + R => A/B = Q + R/B in R((x^(-1))) const int degQ = degA - degB; if (degQ < 0) return {std::vector{Tp(0)}, A}; if (degQ < 60 || degB < 60) return euclidean_div_naive(A, B); auto Q = fps_div(std::vector(A.rend() - (degA + 1), A.rend()), std::vector(B.rend() - (degB + 1), B.rend()), degQ + 1); std::reverse(Q.begin(), Q.end()); // returns a mod (x^n-1) auto make_cyclic = [](const std::vector &a, int n) { assert((n & (n - 1)) == 0); std::vector b(n); for (int i = 0; i < (int)a.size(); ++i) b[i & (n - 1)] += a[i]; return b; }; const int len = fft_len(std::max(degB, 1)); const auto cyclicA = make_cyclic(A, len); auto cyclicB = make_cyclic(B, len); auto cyclicQ = make_cyclic(Q, len); fft(cyclicQ); fft(cyclicB); for (int i = 0; i < len; ++i) cyclicQ[i] *= cyclicB[i]; inv_fft(cyclicQ); // R = A - QB mod (x^n-1) (n >= degB) std::vector R(degB); for (int i = 0; i < degB; ++i) R[i] = cyclicA[i] - cyclicQ[i]; return {Q, R}; } template inline std::vector euclidean_div_quotient(const std::vector &A, const std::vector &B) { const int degA = degree(A); const int degB = degree(B); assert(degB >= 0); // A = Q*B + R => A/B = Q + R/B in R((x^(-1))) const int degQ = degA - degB; if (degQ < 0) return {Tp(0)}; if (std::min(degQ, degB) < 60) return euclidean_div_quotient_naive(A, B); auto Q = fps_div(std::vector(A.rend() - (degA + 1), A.rend()), std::vector(B.rend() - (degB + 1), B.rend()), degQ + 1); std::reverse(Q.begin(), Q.end()); return Q; } #line 7 "poly.hpp" #include #line 10 "poly.hpp" template class Poly : public std::vector { using Base = std::vector; public: using Base::Base; static Poly from(std::vector v) { Poly res; static_cast &>(res) = std::move(v); return res; } static Poly zero() { return Poly(); } static Poly one() { return Poly{Tp::one()}; } bool is_zero() const { return deg() < 0; } bool is_one() const { return deg() == 0 && lc() == Tp::one(); } int deg() const { return degree(*this); } int ord() const { return order(*this); } Poly rev() const { const int d = deg(); Poly res(d + 1); for (int i = d; i >= 0; --i) res[i] = Base::operator[](d - i); return res; } Poly slice(int L, int R) const { Poly res(R - L); for (int i = L; i < std::min(R, Base::size()); ++i) res[i - L] = Base::operator[](i); return res; } Poly trunc(int D) const { Poly res(D); for (int i = 0; i < std::min(D, Base::size()); ++i) res[i] = Base::operator[](i); return res; } Poly &shrink() { Base::resize(deg() + 1); return *this; } Tp lc() const { const int d = deg(); return d == -1 ? Tp() : Base::operator[](d); } Poly monic() const { const int d = deg(); assert(d >= 0); const auto iv = Base::operator[](d).inv(); Poly res(*this); for (int i = 0; i <= d; ++i) res[i] *= iv; return res; } Poly taylor_shift(Tp c) const { return from(::taylor_shift(*this, c)); } Poly operator-() const { const int d = deg(); Poly res(d + 1); for (int i = 0; i <= d; ++i) res[i] = -Base::operator[](i); res.shrink(); return res; } Poly deriv() const { return from(::deriv(*this)); } Poly integr(Tp c = {}) const { return from(::integr(*this, c)); } std::array divmod(const Poly &R) const { auto [q, r] = euclidean_div(*this, R); return {from(std::move(q)), from(std::move(r))}; } Poly &operator+=(const Poly &R) { if (Base::size() < R.size()) Base::resize(R.size()); for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) += R[i]; return shrink(); } Poly &operator-=(const Poly &R) { if (Base::size() < R.size()) Base::resize(R.size()); for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) -= R[i]; return shrink(); } Poly &operator*=(const Poly &R) { Base::operator=(convolution(*this, R)); return shrink(); } Poly &operator/=(const Poly &R) { Base::operator=(euclidean_div_quotient(*this, R)); return shrink(); } Poly &operator%=(const Poly &R) { Base::operator=(std::get<1>(divmod(R))); return shrink(); } Poly &operator<<=(int D) { if (D > 0) { Base::insert(Base::begin(), D, Tp(0)); } else if (D < 0) { if (-D < (int)Base::size()) { Base::erase(Base::begin(), Base::begin() + (-D)); } else { Base::clear(); } } return shrink(); } Poly &operator>>=(int D) { return operator<<=(-D); } friend Poly operator+(const Poly &L, const Poly &R) { return Poly(L) += R; } friend Poly operator-(const Poly &L, const Poly &R) { return Poly(L) -= R; } friend Poly operator*(const Poly &L, const Poly &R) { return Poly(L) *= R; } friend Poly operator/(const Poly &L, const Poly &R) { return Poly(L) /= R; } friend Poly operator%(const Poly &L, const Poly &R) { return Poly(L) %= R; } friend Poly operator<<(const Poly &L, int D) { return Poly(L) <<= D; } friend Poly operator>>(const Poly &L, int D) { return Poly(L) >>= D; } friend std::ostream &operator<<(std::ostream &L, const Poly &R) { L << '['; const int d = R.deg(); if (d < 0) { L << '0'; } else { for (int i = 0; i <= d; ++i) { L << R[i]; if (i == 1) L << "*x"; if (i > 1) L << "*x^" << i; if (i != d) L << " + "; } } return L << ']'; } }; // 2x2 matrix for Euclidean algorithm template class GCDMatrix : public std::array, 2> { public: GCDMatrix(const Tp &x00, const Tp &x01, const Tp &x10, const Tp &x11) : std::array, 2>{std::array{x00, x01}, std::array{x10, x11}} {} GCDMatrix operator*(const GCDMatrix &R) const { return {(*this)[0][0] * R[0][0] + (*this)[0][1] * R[1][0], (*this)[0][0] * R[0][1] + (*this)[0][1] * R[1][1], (*this)[1][0] * R[0][0] + (*this)[1][1] * R[1][0], (*this)[1][0] * R[0][1] + (*this)[1][1] * R[1][1]}; } std::array operator*(const std::array &R) const { return {(*this)[0][0] * R[0] + (*this)[0][1] * R[1], (*this)[1][0] * R[0] + (*this)[1][1] * R[1]}; } Tp det() const { return (*this)[0][0] * (*this)[1][1] - (*this)[0][1] * (*this)[1][0]; } GCDMatrix adj() const { return {(*this)[1][1], -(*this)[0][1], -(*this)[1][0], (*this)[0][0]}; } }; // returns M s.t. deg(M) <= d and deg(M21*A+M22*B) < max(deg(A),deg(B)) - d // det(M) in {-1, 1} // see: // [1]: Daniel J. Bernstein. Fast multiplication and its applications. template inline GCDMatrix> hgcd(const Poly &A, const Poly &B, int d) { using Mat = GCDMatrix>; assert(!(A.deg() < 0 && B.deg() < 0)); if (A.deg() < B.deg()) return hgcd(B, A, d) * Mat({}, {Tp(1)}, {Tp(1)}, {}); if (A.deg() < d) return hgcd(A, B, A.deg()); if (B.deg() < A.deg() - d) return Mat({Tp(1)}, {}, {}, {Tp(1)}); if (int dd = A.deg() - d * 2; dd > 0) return hgcd(A >> dd, B >> dd, d); if (d == 0) return Mat({}, {Tp(1)}, {Tp(1)}, -(A / B)); const auto M = hgcd(A, B, d / 2); const auto D = M[1][0] * A + M[1][1] * B; if (D.deg() < A.deg() - d) return M; const auto C = M[0][0] * A + M[0][1] * B; const auto [Q, R] = C.divmod(D); return hgcd(D, R, D.deg() - (A.deg() - d)) * Mat({}, {Tp(1)}, {Tp(1)}, -Q) * M; } template inline std::array, 3> xgcd(const Poly &A, const Poly &B) { const auto M = hgcd(A, B, std::max(A.deg(), B.deg())); return {M[0][0], M[0][1], M[0][0] * A + M[0][1] * B}; } template inline std::array, 2> inv_gcd(const Poly &A, const Poly &B) { const auto M = hgcd(A, B, std::max(A.deg(), B.deg())); return {M[0][0], M[0][0] * A + M[0][1] * B}; } // returns P, Q s.t. [x^[-k, 0)] P/Q = [x^[-k, 0)] A/B // where P, Q in F[x], deg(Q) is minimized // requires deg(A) < deg(B) template inline std::array, 2> rational_approximation(const Poly &A, const Poly &B, int k) { auto M = hgcd(B, A, k / 2); const auto [C, D] = M * std::array{B, A}; if (D.deg() >= 0 && D.deg() - C.deg() >= -(k - (B.deg() - C.deg()) * 2)) M = GCDMatrix>({}, {Tp(1)}, {Tp(1)}, -(C / D)) * M; return {M.adj()[1][0], M.adj()[0][0]}; } template inline std::array, 2> rational_reconstruction(const std::vector &A) { return rational_approximation(Poly(A.rbegin(), A.rend()), Poly{Tp(1)} << A.size(), A.size()); } // returns [x^[-k, 0)] A/B // requires deg(A) < deg(B) template inline std::vector fraction_to_series(const Poly &A, const Poly &B, int k) { return (((A << k) / B).rev() << (B.deg() - A.deg() - 1)).slice(0, k); } #line 2 "random.hpp" #line 2 "rng.hpp" #include #include // see: https://prng.di.unimi.it/xoshiro256starstar.c // original license CC0 1.0 class xoshiro256starstar { using u64 = std::uint64_t; static inline u64 rotl(const u64 x, int k) { return (x << k) | (x >> (64 - k)); } u64 s_[4]; u64 next() { const u64 res = rotl(s_[1] * 5, 7) * 9; const u64 t = s_[1] << 17; s_[2] ^= s_[0]; s_[3] ^= s_[1]; s_[1] ^= s_[2]; s_[0] ^= s_[3]; s_[2] ^= t; s_[3] = rotl(s_[3], 45); return res; } public: // see: https://prng.di.unimi.it/splitmix64.c // original license CC0 1.0 explicit xoshiro256starstar(u64 seed) { for (int i = 0; i < 4; ++i) { u64 z = (seed += 0x9e3779b97f4a7c15); z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; z = (z ^ (z >> 27)) * 0x94d049bb133111eb; s_[i] = z ^ (z >> 31); } } // see: https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator using result_type = u64; static constexpr u64 min() { return std::numeric_limits::min(); } static constexpr u64 max() { return std::numeric_limits::max(); } u64 operator()() { return next(); } }; #line 4 "random.hpp" #include #line 6 "random.hpp" template inline std::vector random_vector(int n) { std::vector res(n); xoshiro256starstar rng(std::random_device{}()); std::uniform_int_distribution dis(0, Tp::mod() - 1); for (int i = 0; i < n; ++i) res[i] = dis(rng); return res; } template inline std::vector random_vector_without_zero(int n) { std::vector res(n); xoshiro256starstar rng(std::random_device{}()); std::uniform_int_distribution dis(1, Tp::mod() - 1); for (int i = 0; i < n; ++i) res[i] = dis(rng); return res; } #line 6 "mat_basic.hpp" #include #line 9 "mat_basic.hpp" template using Matrix = std::vector>; template inline int width(const Matrix &A) { return A.empty() ? 0 : (int)A[0].size(); } template inline int height(const Matrix &A) { return A.size(); } template inline bool is_square_matrix(const Matrix &A) { return width(A) == height(A); } template inline Matrix transpose(const Matrix &A) { const int w = width(A); const int h = height(A); Matrix TA(w, std::vector(h)); for (int i = 0; i < h; ++i) for (int j = 0; j < w; ++j) TA[j][i] = A[i][j]; return TA; } template inline std::vector mat_apply(const Matrix &A, const std::vector &b) { const int w = width(A); const int h = height(A); assert((int)b.size() == w); std::vector Ab(h); for (int i = 0; i < h; ++i) for (int j = 0; j < w; ++j) Ab[i] += A[i][j] * b[j]; return Ab; } template inline Matrix mat_mul(const Matrix &A, const Matrix &B) { const int wA = width(A); const int hA = height(A); assert(height(B) == wA); const int wB = width(B); Matrix res(hA, std::vector(wB)); for (int i = 0; i < hA; ++i) for (int k = 0; k < wA; ++k) for (int j = 0; j < wB; ++j) res[i][j] += A[i][k] * B[k][j]; return res; } template inline std::optional> mat_inv(Matrix A) { assert(is_square_matrix(A)); const int n = height(A); for (int i = 0; i < n; ++i) { A[i].resize(n * 2); A[i][n + i] = 1; } for (int i = 0; i < n; ++i) { int pivot = i; for (; pivot < n; ++pivot) if (A[pivot][i] != 0) break; if (pivot == n) return {}; if (pivot != i) A[pivot].swap(A[i]); if (A[i][i] != 1) { const auto iv = A[i][i].inv(); for (int j = i; j < n * 2; ++j) A[i][j] *= iv; } for (int j = i + 1; j < n; ++j) { const auto p = A[j][i]; if (p == 0) continue; for (int k = i + 1; k < n * 2; ++k) A[j][k] -= p * A[i][k]; } } for (int i = n - 1; i > 0; --i) { for (int j = i - 1; j >= 0; --j) { const auto p = A[j][i]; if (p == 0) continue; for (int k = n; k < n * 2; ++k) A[j][k] -= p * A[i][k]; } } for (int i = 0; i < n; ++i) A[i].erase(A[i].begin(), A[i].begin() + n); return A; } template inline Tp det(Matrix A) { assert(is_square_matrix(A)); const int n = height(A); Tp det = 1; for (int i = 0; i < n; ++i) { int pivot = i; for (; pivot < n; ++pivot) if (A[pivot][i] != 0) break; if (pivot == n) return 0; if (pivot != i) { A[pivot].swap(A[i]); det = -det; } det *= A[i][i]; const auto iv = A[i][i].inv(); for (int j = i + 1; j < n; ++j) { const auto p = A[j][i] * iv; if (p == 0) continue; for (int k = i; k < n; ++k) A[j][k] -= p * A[i][k]; } } return det; } template inline Matrix to_upper_hessenberg(Matrix A) { assert(is_square_matrix(A)); const int n = height(A); for (int i = 0; i < n - 1; ++i) { int pivot = i + 1; for (; pivot < n; ++pivot) if (A[pivot][i] != 0) break; if (pivot == n) continue; if (pivot != i + 1) { A[pivot].swap(A[i + 1]); for (int j = 0; j < n; ++j) std::swap(A[j][pivot], A[j][i + 1]); } const auto iv = A[i + 1][i].inv(); for (int j = i + 2; j < n; ++j) { if (A[j][i] == 0) continue; const auto v = A[j][i] * iv; for (int k = i; k < n; ++k) A[j][k] -= v * A[i + 1][k]; for (int k = 0; k < n; ++k) A[k][i + 1] += v * A[k][j]; } } return A; } // returns det(xI - A) template inline std::vector charpoly(const Matrix &A) { const auto H = to_upper_hessenberg(A); const int n = height(A); std::vector> P(n + 1); P[0] = {Tp(1)}; for (int i = 1; i <= n; ++i) { P[i].resize(i + 1); for (int j = 0; j < i; ++j) P[i][j] -= H[i - 1][i - 1] * P[i - 1][j], P[i][j + 1] += P[i - 1][j]; Tp t = 1; for (int j = 1; j < i; ++j) { t *= H[i - j][i - j - 1]; const auto prod = t * H[i - j - 1][i - 1]; if (prod == 0) continue; for (int k = 0; k < i - j; ++k) P[i][k] -= prod * P[i - j - 1][k]; } } return P[n]; } template inline std::vector minpoly(const Matrix &A) { assert(is_square_matrix(A)); const int n = height(A); const auto u = random_vector(n); auto v = random_vector(n); // u^T A^([0..2n)) v std::vector proj(n * 2); for (int i = 0; i < n * 2; v = mat_apply(A, v), ++i) for (int j = 0; j < n; ++j) proj[i] += u[j] * v[j]; const auto [P, Q] = rational_reconstruction(proj); assert(Q.deg() <= n); return Q.monic(); } #line 7 "mat_extra.hpp" /* test case generator: ```sage import random R. = GF(998244353)[] N = 5 d = 4 if __name__ == '__main__': print(N, d) A = zero_matrix(GF(998244353), N) L = [] for _ in range(d): B = random_matrix(GF(998244353), N, algorithm='echelonizable', rank=random.randint(1,N)) L.append(B) A = x*A + B L.reverse() for i in range(len(L)): for j in range(N): print(' '.join([str(k) for k in L[i][j].list()]), end='\n') print(det(A).list()) ``` */ // returns det(A0 + xA1 + ... + x^(d-1) Ad) // test: https://qoj.ac/contest/1536/problem/59 // see: // [1]: Elegia's comment. // https://codeforces.com/blog/entry/92248?#comment-818786 template inline std::vector det_d(Matrix> A) { assert(is_square_matrix(A)); auto sub = [](auto &a, const auto &b, Tp v, int n, int d) { if (v == 0) return; for (int i = 0; i < n; ++i) for (int j = 0; j < d; ++j) a[i][j] -= v * b[i][j]; }; const int n = height(A); const int d = (n == 0 ? 0 : (int)A[0][0].size()); Tp m = 1; for (int i = 0; i < n; ++i) { int pivot = i; for (; pivot < n; ++pivot) if (A[pivot][i][d - 1] != 0) break; if (pivot == n) continue; if (pivot != i) { A[pivot].swap(A[i]); m = -m; } m *= A[i][i][d - 1]; const auto iv = A[i][i][d - 1].inv(); for (int j = 0; j < n; ++j) for (int k = 0; k < d; ++k) A[i][j][k] *= iv; for (int j = 0; j < i; ++j) sub(A[j], A[i], A[j][i][d - 1], n, d); for (int j = i + 1; j < n; ++j) sub(A[j], A[i], A[j][i][d - 1], n, d); } int t = 0; for (; t <= n * (d - 1); ++t) { int s = 0; for (; s < n; ++s) if (std::all_of(A[s].begin(), A[s].end(), [d](const auto &a) { return a[d - 1] == 0; })) break; if (s == n) break; for (int i = 0; i < n; ++i) std::rotate(A[s][i].rbegin(), A[s][i].rbegin() + 1, A[s][i].rend()); for (int i = 0; i < s; ++i) sub(A[s], A[i], A[s][i][d - 1], n, d); for (int i = s + 1; i < n; ++i) sub(A[s], A[i], A[s][i][d - 1], n, d); int pivot = 0; for (; pivot < n; ++pivot) if (A[s][pivot][d - 1] != 0) break; if (pivot == n) continue; m *= A[s][pivot][d - 1]; auto iv = A[s][pivot][d - 1].inv(); for (int i = 0; i < n; ++i) for (int j = 0; j < d; ++j) A[s][i][j] *= iv; for (int i = 0; i < s; ++i) sub(A[i], A[s], A[i][pivot][d - 1], n, d); for (int i = s + 1; i < n; ++i) sub(A[i], A[s], A[i][pivot][d - 1], n, d); if (pivot == s) continue; A[pivot].swap(A[s]); m = -m; if (A[s][s][d - 1] == 0) continue; m *= A[s][s][d - 1]; iv = A[s][s][d - 1].inv(); for (int i = 0; i < n; ++i) for (int j = 0; j < d; ++j) A[s][i][j] *= iv; for (int i = 0; i < s; ++i) sub(A[i], A[s], A[i][s][d - 1], n, d); for (int i = s + 1; i < n; ++i) sub(A[i], A[s], A[i][s][d - 1], n, d); } if (t > n * (d - 1)) return {}; // [ I ] // [ ... ] // B = [ ... ] // [ I] // [C_0 C_1 ... C_(d-2)] // charpoly(B) = det(x^(d-1)I - ... - C_0) (Elegia, zx2003, mayaohua2003). Matrix B(n * (d - 1), std::vector(n * (d - 1))); for (int i = 0; i < d - 1; ++i) for (int j = 0; j < n; ++j) for (int k = 0; k < n; ++k) B[(d - 2) * n + j][i * n + k] = -A[j][k][i]; for (int i = 0; i < d - 2; ++i) for (int j = 0; j < n; ++j) B[i * n + j][(i + 1) * n + j] = 1; auto res = charpoly(B); res.erase(res.begin(), res.begin() + t); for (int i = 0; i < (int)res.size(); ++i) res[i] *= m; return res; } #line 2 "modint.hpp" #line 5 "modint.hpp" // clang-format off template class ModInt { static_assert((Mod >> 31) == 0, "`Mod` must less than 2^(31)"); template static std::enable_if_t, unsigned> safe_mod(Int v) { using D = std::common_type_t; return (v %= (int)Mod) < 0 ? (D)(v + (int)Mod) : (D)v; } struct PrivateConstructor {} static inline private_constructor{}; ModInt(PrivateConstructor, unsigned v) : v_(v) {} unsigned v_; public: static unsigned mod() { return Mod; } static ModInt from_raw(unsigned v) { return ModInt(private_constructor, v); } static ModInt zero() { return from_raw(0); } static ModInt one() { return from_raw(1); } bool is_zero() const { return v_ == 0; } bool is_one() const { return v_ == 1; } ModInt() : v_() {} template, int> = 0> ModInt(Int v) : v_(safe_mod(v)) {} template, int> = 0> ModInt(Int v) : v_(v % Mod) {} unsigned val() const { return v_; } ModInt operator-() const { return from_raw(v_ == 0 ? v_ : Mod - v_); } ModInt pow(long long e) const { if (e < 0) return inv().pow(-e); for (ModInt x(*this), res(from_raw(1));; x *= x) { if (e & 1) res *= x; if ((e >>= 1) == 0) return res; }} ModInt inv() const { int x1 = 1, x3 = 0, a = val(), b = Mod; while (b) { const int q = a / b, x1_old = x1, a_old = a; x1 = x3, x3 = x1_old - x3 * q, a = b, b = a_old - b * q; } return from_raw(x1 < 0 ? x1 + (int)Mod : x1); } template std::enable_if_t div_by_2() const { if (v_ & 1) return from_raw((v_ + Mod) >> 1); return from_raw(v_ >> 1); } ModInt &operator+=(const ModInt &a) { if ((v_ += a.v_) >= Mod) v_ -= Mod; return *this; } ModInt &operator-=(const ModInt &a) { if ((v_ += Mod - a.v_) >= Mod) v_ -= Mod; return *this; } ModInt &operator*=(const ModInt &a) { v_ = (unsigned long long)v_ * a.v_ % Mod; return *this; } ModInt &operator/=(const ModInt &a) { return *this *= a.inv(); } ModInt &operator++() { return *this += one(); } ModInt operator++(int) { ModInt o(*this); *this += one(); return o; } ModInt &operator--() { return *this -= one(); } ModInt operator--(int) { ModInt o(*this); *this -= one(); return o; } friend ModInt operator+(const ModInt &a, const ModInt &b) { return ModInt(a) += b; } friend ModInt operator-(const ModInt &a, const ModInt &b) { return ModInt(a) -= b; } friend ModInt operator*(const ModInt &a, const ModInt &b) { return ModInt(a) *= b; } friend ModInt operator/(const ModInt &a, const ModInt &b) { return ModInt(a) /= b; } friend bool operator==(const ModInt &a, const ModInt &b) { return a.v_ == b.v_; } friend bool operator!=(const ModInt &a, const ModInt &b) { return a.v_ != b.v_; } friend std::istream &operator>>(std::istream &a, ModInt &b) { int v; a >> v; b.v_ = safe_mod(v); return a; } friend std::ostream &operator<<(std::ostream &a, const ModInt &b) { return a << b.val(); } }; // clang-format on #line 7 "test/matrix/yukicoder1907.0.test.cpp" int main() { std::ios::sync_with_stdio(false); std::cin.tie(nullptr); using mint = ModInt<998244353>; int n; std::cin >> n; Matrix> A(n, std::vector(n, std::vector(2))); for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { std::cin >> A[i][j][0]; } } for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { std::cin >> A[i][j][1]; } } auto d = det_d(A); d.resize(n + 1); for (int i = 0; i <= n; ++i) std::cout << d[i] << '\n'; return 0; }