結果
| 問題 |
No.3285 Chorus with Friends
|
| コンテスト | |
| ユーザー |
hitonanode
|
| 提出日時 | 2025-09-26 22:40:44 |
| 言語 | C++23 (gcc 13.3.0 + boost 1.87.0) |
| 結果 |
AC
|
| 実行時間 | 864 ms / 3,000 ms |
| コード長 | 23,702 bytes |
| コンパイル時間 | 3,102 ms |
| コンパイル使用メモリ | 251,076 KB |
| 実行使用メモリ | 8,320 KB |
| 最終ジャッジ日時 | 2025-09-26 22:40:52 |
| 合計ジャッジ時間 | 7,713 ms |
|
ジャッジサーバーID (参考情報) |
judge1 / judge5 |
(要ログイン)
| ファイルパターン | 結果 |
|---|---|
| sample | AC * 3 |
| other | AC * 40 |
ソースコード
#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <chrono>
#include <cmath>
#include <complex>
#include <deque>
#include <forward_list>
#include <fstream>
#include <functional>
#include <iomanip>
#include <ios>
#include <iostream>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <numeric>
#include <optional>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using namespace std;
using lint = long long;
using pint = pair<int, int>;
using plint = pair<lint, lint>;
struct fast_ios { fast_ios(){ cin.tie(nullptr), ios::sync_with_stdio(false), cout << fixed << setprecision(20); }; } fast_ios_;
#define ALL(x) (x).begin(), (x).end()
#define FOR(i, begin, end) for(int i=(begin),i##_end_=(end);i<i##_end_;i++)
#define IFOR(i, begin, end) for(int i=(end)-1,i##_begin_=(begin);i>=i##_begin_;i--)
#define REP(i, n) FOR(i,0,n)
#define IREP(i, n) IFOR(i,0,n)
template <typename T> bool chmax(T &m, const T q) { return m < q ? (m = q, true) : false; }
template <typename T> bool chmin(T &m, const T q) { return m > q ? (m = q, true) : false; }
const std::vector<std::pair<int, int>> grid_dxs{{1, 0}, {-1, 0}, {0, 1}, {0, -1}};
int floor_lg(long long x) { return x <= 0 ? -1 : 63 - __builtin_clzll(x); }
template <class T1, class T2> T1 floor_div(T1 num, T2 den) { return (num > 0 ? num / den : -((-num + den - 1) / den)); }
template <class T1, class T2> std::pair<T1, T2> operator+(const std::pair<T1, T2> &l, const std::pair<T1, T2> &r) { return std::make_pair(l.first + r.first, l.second + r.second); }
template <class T1, class T2> std::pair<T1, T2> operator-(const std::pair<T1, T2> &l, const std::pair<T1, T2> &r) { return std::make_pair(l.first - r.first, l.second - r.second); }
template <class T> std::vector<T> sort_unique(std::vector<T> vec) { sort(vec.begin(), vec.end()), vec.erase(unique(vec.begin(), vec.end()), vec.end()); return vec; }
template <class T> int arglb(const std::vector<T> &v, const T &x) { return std::distance(v.begin(), std::lower_bound(v.begin(), v.end(), x)); }
template <class T> int argub(const std::vector<T> &v, const T &x) { return std::distance(v.begin(), std::upper_bound(v.begin(), v.end(), x)); }
template <class IStream, class T> IStream &operator>>(IStream &is, std::vector<T> &vec) { for (auto &v : vec) is >> v; return is; }
template <class OStream, class T> OStream &operator<<(OStream &os, const std::vector<T> &vec);
template <class OStream, class T, size_t sz> OStream &operator<<(OStream &os, const std::array<T, sz> &arr);
template <class OStream, class T, class TH> OStream &operator<<(OStream &os, const std::unordered_set<T, TH> &vec);
template <class OStream, class T, class U> OStream &operator<<(OStream &os, const pair<T, U> &pa);
template <class OStream, class T> OStream &operator<<(OStream &os, const std::deque<T> &vec);
template <class OStream, class T> OStream &operator<<(OStream &os, const std::set<T> &vec);
template <class OStream, class T> OStream &operator<<(OStream &os, const std::multiset<T> &vec);
template <class OStream, class T> OStream &operator<<(OStream &os, const std::unordered_multiset<T> &vec);
template <class OStream, class T, class U> OStream &operator<<(OStream &os, const std::pair<T, U> &pa);
template <class OStream, class TK, class TV> OStream &operator<<(OStream &os, const std::map<TK, TV> &mp);
template <class OStream, class TK, class TV, class TH> OStream &operator<<(OStream &os, const std::unordered_map<TK, TV, TH> &mp);
template <class OStream, class... T> OStream &operator<<(OStream &os, const std::tuple<T...> &tpl);
template <class OStream, class T> OStream &operator<<(OStream &os, const std::vector<T> &vec) { os << '['; for (auto v : vec) os << v << ','; os << ']'; return os; }
template <class OStream, class T, size_t sz> OStream &operator<<(OStream &os, const std::array<T, sz> &arr) { os << '['; for (auto v : arr) os << v << ','; os << ']'; return os; }
template <class... T> std::istream &operator>>(std::istream &is, std::tuple<T...> &tpl) { std::apply([&is](auto &&... args) { ((is >> args), ...);}, tpl); return is; }
template <class OStream, class... T> OStream &operator<<(OStream &os, const std::tuple<T...> &tpl) { os << '('; std::apply([&os](auto &&... args) { ((os << args << ','), ...);}, tpl); return os << ')'; }
template <class OStream, class T, class TH> OStream &operator<<(OStream &os, const std::unordered_set<T, TH> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <class OStream, class T> OStream &operator<<(OStream &os, const std::deque<T> &vec) { os << "deq["; for (auto v : vec) os << v << ','; os << ']'; return os; }
template <class OStream, class T> OStream &operator<<(OStream &os, const std::set<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <class OStream, class T> OStream &operator<<(OStream &os, const std::multiset<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <class OStream, class T> OStream &operator<<(OStream &os, const std::unordered_multiset<T> &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; }
template <class OStream, class T, class U> OStream &operator<<(OStream &os, const std::pair<T, U> &pa) { return os << '(' << pa.first << ',' << pa.second << ')'; }
template <class OStream, class TK, class TV> OStream &operator<<(OStream &os, const std::map<TK, TV> &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; }
template <class OStream, class TK, class TV, class TH> OStream &operator<<(OStream &os, const std::unordered_map<TK, TV, TH> &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; }
#ifdef HITONANODE_LOCAL
const string COLOR_RESET = "\033[0m", BRIGHT_GREEN = "\033[1;32m", BRIGHT_RED = "\033[1;31m", BRIGHT_CYAN = "\033[1;36m", NORMAL_CROSSED = "\033[0;9;37m", RED_BACKGROUND = "\033[1;41m", NORMAL_FAINT = "\033[0;2m";
#define dbg(x) std::cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << std::endl
#define dbgif(cond, x) ((cond) ? std::cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << std::endl : std::cerr)
#else
#define dbg(x) ((void)0)
#define dbgif(cond, x) ((void)0)
#endif
#include <cassert>
#include <iostream>
#include <set>
#include <vector>
template <int md> struct ModInt {
static_assert(md > 1);
using lint = long long;
constexpr static int mod() { return md; }
static int get_primitive_root() {
static int primitive_root = 0;
if (!primitive_root) {
primitive_root = [&]() {
std::set<int> fac;
int v = md - 1;
for (lint i = 2; i * i <= v; i++)
while (v % i == 0) fac.insert(i), v /= i;
if (v > 1) fac.insert(v);
for (int g = 1; g < md; g++) {
bool ok = true;
for (auto i : fac)
if (ModInt(g).pow((md - 1) / i) == 1) {
ok = false;
break;
}
if (ok) return g;
}
return -1;
}();
}
return primitive_root;
}
int val_;
int val() const noexcept { return val_; }
constexpr ModInt() : val_(0) {}
constexpr ModInt &_setval(lint v) { return val_ = (v >= md ? v - md : v), *this; }
constexpr ModInt(lint v) { _setval(v % md + md); }
constexpr explicit operator bool() const { return val_ != 0; }
constexpr ModInt operator+(const ModInt &x) const {
return ModInt()._setval((lint)val_ + x.val_);
}
constexpr ModInt operator-(const ModInt &x) const {
return ModInt()._setval((lint)val_ - x.val_ + md);
}
constexpr ModInt operator*(const ModInt &x) const {
return ModInt()._setval((lint)val_ * x.val_ % md);
}
constexpr ModInt operator/(const ModInt &x) const {
return ModInt()._setval((lint)val_ * x.inv().val() % md);
}
constexpr ModInt operator-() const { return ModInt()._setval(md - val_); }
constexpr ModInt &operator+=(const ModInt &x) { return *this = *this + x; }
constexpr ModInt &operator-=(const ModInt &x) { return *this = *this - x; }
constexpr ModInt &operator*=(const ModInt &x) { return *this = *this * x; }
constexpr ModInt &operator/=(const ModInt &x) { return *this = *this / x; }
friend constexpr ModInt operator+(lint a, const ModInt &x) { return ModInt(a) + x; }
friend constexpr ModInt operator-(lint a, const ModInt &x) { return ModInt(a) - x; }
friend constexpr ModInt operator*(lint a, const ModInt &x) { return ModInt(a) * x; }
friend constexpr ModInt operator/(lint a, const ModInt &x) { return ModInt(a) / x; }
constexpr bool operator==(const ModInt &x) const { return val_ == x.val_; }
constexpr bool operator!=(const ModInt &x) const { return val_ != x.val_; }
constexpr bool operator<(const ModInt &x) const {
return val_ < x.val_;
} // To use std::map<ModInt, T>
friend std::istream &operator>>(std::istream &is, ModInt &x) {
lint t;
return is >> t, x = ModInt(t), is;
}
constexpr friend std::ostream &operator<<(std::ostream &os, const ModInt &x) {
return os << x.val_;
}
constexpr ModInt pow(lint n) const {
ModInt ans = 1, tmp = *this;
while (n) {
if (n & 1) ans *= tmp;
tmp *= tmp, n >>= 1;
}
return ans;
}
static constexpr int cache_limit = std::min(md, 1 << 21);
static std::vector<ModInt> facs, facinvs, invs;
constexpr static void _precalculation(int N) {
const int l0 = facs.size();
if (N > md) N = md;
if (N <= l0) return;
facs.resize(N), facinvs.resize(N), invs.resize(N);
for (int i = l0; i < N; i++) facs[i] = facs[i - 1] * i;
facinvs[N - 1] = facs.back().pow(md - 2);
for (int i = N - 2; i >= l0; i--) facinvs[i] = facinvs[i + 1] * (i + 1);
for (int i = N - 1; i >= l0; i--) invs[i] = facinvs[i] * facs[i - 1];
}
constexpr ModInt inv() const {
if (this->val_ < cache_limit) {
if (facs.empty()) facs = {1}, facinvs = {1}, invs = {0};
while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2);
return invs[this->val_];
} else {
return this->pow(md - 2);
}
}
constexpr static ModInt fac(int n) {
assert(n >= 0);
if (n >= md) return ModInt(0);
while (n >= int(facs.size())) _precalculation(facs.size() * 2);
return facs[n];
}
constexpr static ModInt facinv(int n) {
assert(n >= 0);
if (n >= md) return ModInt(0);
while (n >= int(facs.size())) _precalculation(facs.size() * 2);
return facinvs[n];
}
constexpr static ModInt doublefac(int n) {
assert(n >= 0);
if (n >= md) return ModInt(0);
long long k = (n + 1) / 2;
return (n & 1) ? ModInt::fac(k * 2) / (ModInt(2).pow(k) * ModInt::fac(k))
: ModInt::fac(k) * ModInt(2).pow(k);
}
constexpr static ModInt nCr(int n, int r) {
assert(n >= 0);
if (r < 0 or n < r) return ModInt(0);
return ModInt::fac(n) * ModInt::facinv(r) * ModInt::facinv(n - r);
}
constexpr static ModInt nPr(int n, int r) {
assert(n >= 0);
if (r < 0 or n < r) return ModInt(0);
return ModInt::fac(n) * ModInt::facinv(n - r);
}
static ModInt binom(int n, int r) {
static long long bruteforce_times = 0;
if (r < 0 or n < r) return ModInt(0);
if (n <= bruteforce_times or n < (int)facs.size()) return ModInt::nCr(n, r);
r = std::min(r, n - r);
ModInt ret = ModInt::facinv(r);
for (int i = 0; i < r; ++i) ret *= n - i;
bruteforce_times += r;
return ret;
}
// Multinomial coefficient, (k_1 + k_2 + ... + k_m)! / (k_1! k_2! ... k_m!)
// Complexity: O(sum(ks))
template <class Vec> static ModInt multinomial(const Vec &ks) {
ModInt ret{1};
int sum = 0;
for (int k : ks) {
assert(k >= 0);
ret *= ModInt::facinv(k), sum += k;
}
return ret * ModInt::fac(sum);
}
template <class... Args> static ModInt multinomial(Args... args) {
int sum = (0 + ... + args);
ModInt result = (1 * ... * ModInt::facinv(args));
return ModInt::fac(sum) * result;
}
// Catalan number, C_n = binom(2n, n) / (n + 1) = # of Dyck words of length 2n
// C_0 = 1, C_1 = 1, C_2 = 2, C_3 = 5, C_4 = 14, ...
// https://oeis.org/A000108
// Complexity: O(n)
static ModInt catalan(int n) {
if (n < 0) return ModInt(0);
return ModInt::fac(n * 2) * ModInt::facinv(n + 1) * ModInt::facinv(n);
}
ModInt sqrt() const {
if (val_ == 0) return 0;
if (md == 2) return val_;
if (pow((md - 1) / 2) != 1) return 0;
ModInt b = 1;
while (b.pow((md - 1) / 2) == 1) b += 1;
int e = 0, m = md - 1;
while (m % 2 == 0) m >>= 1, e++;
ModInt x = pow((m - 1) / 2), y = (*this) * x * x;
x *= (*this);
ModInt z = b.pow(m);
while (y != 1) {
int j = 0;
ModInt t = y;
while (t != 1) j++, t *= t;
z = z.pow(1LL << (e - j - 1));
x *= z, z *= z, y *= z;
e = j;
}
return ModInt(std::min(x.val_, md - x.val_));
}
};
template <int md> std::vector<ModInt<md>> ModInt<md>::facs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::facinvs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::invs = {0};
using mint = ModInt<998244353>;
#include <algorithm>
#include <cassert>
#include <vector>
// Subset sum (fast zeta transform)
// Complexity: O(N 2^N) for array of size 2^N
template <typename T> void subset_sum(std::vector<T> &f) {
const int sz = f.size(), n = __builtin_ctz(sz);
assert(__builtin_popcount(sz) == 1);
for (int d = 0; d < n; d++) {
for (int S = 0; S < 1 << n; S++)
if (S & (1 << d)) f[S] += f[S ^ (1 << d)];
}
}
// Inverse of subset sum (fast moebius transform)
// Complexity: O(N 2^N) for array of size 2^N
template <typename T> void subset_sum_inv(std::vector<T> &g) {
const int sz = g.size(), n = __builtin_ctz(sz);
assert(__builtin_popcount(sz) == 1);
for (int d = 0; d < n; d++) {
for (int S = 0; S < 1 << n; S++)
if (S & (1 << d)) g[S] -= g[S ^ (1 << d)];
}
}
// Superset sum / its inverse (fast zeta/moebius transform)
// Complexity: O(N 2^N) for array of size 2^N
template <typename T> void superset_sum(std::vector<T> &f) {
const int sz = f.size(), n = __builtin_ctz(sz);
assert(__builtin_popcount(sz) == 1);
for (int d = 0; d < n; d++) {
for (int S = 0; S < 1 << n; S++)
if (!(S & (1 << d))) f[S] += f[S | (1 << d)];
}
}
template <typename T> void superset_sum_inv(std::vector<T> &g) {
const int sz = g.size(), n = __builtin_ctz(sz);
assert(__builtin_popcount(sz) == 1);
for (int d = 0; d < n; d++) {
for (int S = 0; S < 1 << n; S++)
if (!(S & (1 << d))) g[S] -= g[S | (1 << d)];
}
}
template <typename T> std::vector<std::vector<T>> build_zeta_(int D, const std::vector<T> &f) {
int n = f.size();
std::vector<std::vector<T>> ret(D, std::vector<T>(n));
for (int i = 0; i < n; i++) ret[__builtin_popcount(i)][i] += f[i];
for (auto &vec : ret) subset_sum(vec);
return ret;
}
template <typename T>
std::vector<T> get_moebius_of_prod_(const std::vector<std::vector<T>> &mat1,
const std::vector<std::vector<T>> &mat2) {
int D = mat1.size(), n = mat1[0].size();
std::vector<std::vector<int>> pc2i(D);
for (int i = 0; i < n; i++) pc2i[__builtin_popcount(i)].push_back(i);
std::vector<T> tmp, ret(mat1[0].size());
for (int d = 0; d < D; d++) {
tmp.assign(mat1[d].size(), 0);
for (int e = 0; e <= d; e++) {
for (int i = 0; i < int(tmp.size()); i++) tmp[i] += mat1[e][i] * mat2[d - e][i];
}
subset_sum_inv(tmp);
for (auto i : pc2i[d]) ret[i] = tmp[i];
}
return ret;
};
// Subset convolution
// h[S] = \sum_T f[T] * g[S - T]
// Complexity: O(N^2 2^N) for arrays of size 2^N
template <typename T> std::vector<T> subset_convolution(std::vector<T> f, std::vector<T> g) {
const int sz = f.size(), m = __builtin_ctz(sz) + 1;
assert(__builtin_popcount(sz) == 1 and f.size() == g.size());
auto ff = build_zeta_(m, f), fg = build_zeta_(m, g);
return get_moebius_of_prod_(ff, fg);
}
// https://hos-lyric.hatenablog.com/entry/2021/01/14/201231
template <class T, class Function> void subset_func(std::vector<T> &f, const Function &func) {
const int sz = f.size(), m = __builtin_ctz(sz) + 1;
assert(__builtin_popcount(sz) == 1);
auto ff = build_zeta_(m, f);
std::vector<T> p(m);
for (int i = 0; i < sz; i++) {
for (int d = 0; d < m; d++) p[d] = ff[d][i];
func(p);
for (int d = 0; d < m; d++) ff[d][i] = p[d];
}
for (auto &vec : ff) subset_sum_inv(vec);
for (int i = 0; i < sz; i++) f[i] = ff[__builtin_popcount(i)][i];
}
// log(f(x)) for f(x), f(0) == 1
// Requires inv()
template <class T> void poly_log(std::vector<T> &f) {
assert(f.at(0) == T(1));
static std::vector<T> invs{0};
const int m = f.size();
std::vector<T> finv(m);
for (int d = 0; d < m; d++) {
finv[d] = (d == 0);
if (int(invs.size()) <= d) invs.push_back(T(d).inv());
for (int e = 0; e < d; e++) finv[d] -= finv[e] * f[d - e];
}
std::vector<T> ret(m);
for (int d = 1; d < m; d++) {
for (int e = 0; d + e < m; e++) ret[d + e] += f[d] * d * finv[e] * invs[d + e];
}
f = ret;
}
// log(f(S)) for set function f(S), f(0) == 1
// Requires inv()
// Complexity: O(n^2 2^n)
// https://atcoder.jp/contests/abc213/tasks/abc213_g
template <class T> void subset_log(std::vector<T> &f) { subset_func(f, poly_log<T>); }
// exp(f(S)) for set function f(S), f(0) == 0
// Complexity: O(n^2 2^n)
// https://codeforces.com/blog/entry/92183
template <class T> void subset_exp(std::vector<T> &f) {
const int sz = f.size(), m = __builtin_ctz(sz);
assert(sz == 1 << m);
assert(f.at(0) == 0);
std::vector<T> ret{T(1)};
ret.reserve(sz);
for (int d = 0; d < m; d++) {
auto c = subset_convolution({f.begin() + (1 << d), f.begin() + (1 << (d + 1))}, ret);
ret.insert(ret.end(), c.begin(), c.end());
}
f = ret;
}
// sqrt(f(x)), f(x) == 1
// Requires inv of 2
// Compelxity: O(n^2)
template <class T> void poly_sqrt(std::vector<T> &f) {
assert(f.at(0) == T(1));
const int m = f.size();
static const auto inv2 = T(2).inv();
for (int d = 1; d < m; d++) {
if (~(d & 1)) f[d] -= f[d / 2] * f[d / 2];
f[d] *= inv2;
for (int e = 1; e < d - e; e++) f[d] -= f[e] * f[d - e];
}
}
// sqrt(f(S)) for set function f(S), f(0) == 1
// Requires inv()
// https://atcoder.jp/contests/xmascon20/tasks/xmascon20_h
template <class T> void subset_sqrt(std::vector<T> &f) { subset_func(f, poly_sqrt<T>); }
// exp(f(S)) for set function f(S), f(0) == 0
template <class T> void poly_exp(std::vector<T> &P) {
const int m = P.size();
assert(m and P[0] == 0);
std::vector<T> Q(m), logQ(m), Qinv(m);
Q[0] = Qinv[0] = T(1);
static std::vector<T> invs{0};
auto set_invlog = [&](int d) {
Qinv[d] = 0;
for (int e = 0; e < d; e++) Qinv[d] -= Qinv[e] * Q[d - e];
while (d >= int(invs.size())) {
int sz = invs.size();
invs.push_back(T(sz).inv());
}
logQ[d] = 0;
for (int e = 1; e <= d; e++) logQ[d] += Q[e] * e * Qinv[d - e];
logQ[d] *= invs[d];
};
for (int d = 1; d < m; d++) {
Q[d] += P[d] - logQ[d];
set_invlog(d);
assert(logQ[d] == P[d]);
if (d + 1 < m) set_invlog(d + 1);
}
P = Q;
}
// f(S)^k for set function f(S)
// Requires inv()
template <class T> void subset_pow(std::vector<T> &f, long long k) {
auto poly_pow = [&](std::vector<T> &f) {
const int m = f.size();
if (k == 0) f[0] = 1, std::fill(f.begin() + 1, f.end(), T(0));
if (k <= 1) return;
int nzero = 0;
while (nzero < int(f.size()) and f[nzero] == T(0)) nzero++;
int rem = std::max<long long>((long long)f.size() - nzero * k, 0LL);
if (rem == 0) {
std::fill(f.begin(), f.end(), 0);
return;
}
f.erase(f.begin(), f.begin() + nzero);
f.resize(rem);
const T f0 = f.at(0), f0inv = f0.inv(), f0pow = f0.pow(k);
for (auto &x : f) x *= f0inv;
poly_log(f);
for (auto &x : f) x *= k;
poly_exp(f);
for (auto &x : f) x *= f0pow;
f.resize(rem, 0);
f.insert(f.begin(), m - int(f.size()), T(0));
};
subset_func(f, poly_pow);
}
int main() {
int N, M;
cin >> N >> M;
vector A(N, vector<int>(M));
cin >> A;
dbg(A);
vector<int> cands;
REP(i, N) cands.push_back(A.at(i).front());
cands = sort_unique(cands);
dbg(cands);
map<int, vector<vector<int>>> groups;
for (const auto &a : A) { groups[a.front()].push_back(a); }
mint ret = 0;
for (const auto &[a, mat] : groups) {
dbg(a);
dbg(mat);
const int rows = mat.size();
if (M < 16) {
vector<mint> dp(1 << M);
dp.at(0) = 1;
for (const auto &row : mat) {
int oks = 0;
REP(j, M) if (row.at(j) == a) oks |= 1 << j;
dbg(oks);
vector<mint> trans(1 << M);
REP(mask, 1 << M) {
bool fail = false;
bool last = false;
REP(j, M) {
if (mask & (1 << j)) {
const bool bad = !(oks & (1 << (j + 1)));
if (last) {
fail = true;
} else if (bad) {
last = true;
}
}
}
if (!fail) trans.at(mask) += 1;
}
dp = subset_convolution(dp, trans);
dbgif(M <= 3, dp);
}
ret += dp.back();
} else {
vector<mint> dp(1 << rows);
dp.back() = 1;
REP(i, M) {
vector<mint> dpnxt(dp.size());
REP(S, 1 << rows) {
const mint dpnow = dp.at(S);
if (dpnow == 0) continue;
REP(nxt, rows) {
if (!(S & (1 << nxt))) continue;
int nxtS = S;
if (i + 1 < M and mat.at(nxt).at(i + 1) != a) nxtS -= 1 << nxt;
dpnxt.at(nxtS) += dpnow;
}
}
dp = dpnxt;
}
ret += accumulate(ALL(dp), mint(0));
}
}
cout << ret << '\n';
}
hitonanode