
問題 No.2231 Surprising Flash!
ユーザー 👑 rin204
提出日時 2023-03-06 00:16:12
言語 C++17
(gcc 13.3.0 + boost 1.87.0)
実行時間 879 ms / 4,000 ms
コード長 41,095 bytes
コンパイル時間 4,989 ms
コンパイル使用メモリ 273,172 KB
最終ジャッジ日時 2025-02-11 05:37:45
judge3 / judge4
ファイルパターン 結果
sample AC * 1
other AC * 44


diff #

#line 1 "A.cpp"
// #pragma GCC target("avx2")
// #pragma GCC optimize("O3")
// #pragma GCC optimize("unroll-loops")
using namespace std;
using ll = long long;
using ull = unsigned long long;
template <class T>
using pq = priority_queue<T>;
template <class T>
using qp = priority_queue<T, vector<T>, greater<T>>;
#define vec(T, A, ...) vector<T> A(__VA_ARGS__);
#define vvec(T, A, h, ...) vector<vector<T>> A(h, vector<T>(__VA_ARGS__));
#define vvvec(T, A, h1, h2, ...) vector<vector<vector<T>>> A(h1, vector<vector<T>>(h2, vector<T>(__VA_ARGS__)));
#ifndef RIN__LOCAL
#define endl "\n"
#define spa ' '
#define len(A) A.size()
#define all(A) begin(A), end(A)
#define fori1(a) for(ll _ = 0; _ < (a); _++)
#define fori2(i, a) for(ll i = 0; i < (a); i++)
#define fori3(i, a, b) for(ll i = (a); i < (b); i++)
#define fori4(i, a, b, c) for(ll i = (a); ((c) > 0 || i > (b)) && ((c) < 0 || i < (b)); i += (c))
#define overload4(a, b, c, d, e, ...) e
#define fori(...) overload4(__VA_ARGS__, fori4, fori3, fori2, fori1)(__VA_ARGS__)
template <typename T>
vector<tuple<ll, T>> ENUMERATE(vector<T> &A, ll s = 0){
vector<tuple<ll, T>> ret(A.size());
for(int i = 0; i < A.size(); i++) ret[i] = {i + s, A[i]};
return ret;
vector<tuple<ll, char>> ENUMERATE(string &A, ll s = 0){
vector<tuple<ll, char>> ret(A.size());
for(int i = 0; i < A.size(); i++) ret[i] = {i + s, A[i]};
return ret;
#define enum1(A) fori(A.size())
#define enum2(a, A) for(auto a:A)
#define enum3(i, a, A) for(auto&& [i, a]: ENUMERATE(A))
#define enum4(i, a, A, s) for(auto&& [i, a]: ENUMERATE(A, s))
#define enum(...) overload4(__VA_ARGS__, enum4, enum3, enum2, enum1)(__VA_ARGS__)
template <typename T, typename S>
vector<tuple<T, S>> ZIP(vector<T> &A, vector<S> &B){
int n = min(A.size(), B.size());
vector<tuple<T, S>> ret(n);
for(int i = 0; i < n; i++) ret[i] = {A[i], B[i]};
return ret;
template <typename T, typename S>
vector<tuple<ll, T, S>> ENUMZIP(vector<T> &A, vector<S> &B, ll s = 0){
int n = min(A.size(), B.size());
vector<tuple<ll, T, S>> ret(n);
for(int i = 0; i < n; i++) ret[i] = {i + s, A[i], B[i]};
return ret;
#define zip4(a, b, A, B) for(auto&& [a, b]: ZIP(A, B))
#define enumzip5(i, a, b, A, B) for(auto&& [i, a, b]: ENUMZIP(A, B))
#define enumzip6(i, a, b, A, B, s) for(auto&& [i, a, b]: ENUMZIP(A, B, s))
#define overload6(a, b, c, d, e, f, g, ...) g
#define zip(...) overload6(__VA_ARGS__, enumzip6, enumzip5, zip4, _, _, _)(__VA_ARGS__)
vector<char> stoc(string &S){
int n = S.size();
vector<char> ret(n);
for(int i = 0; i < n; i++) ret[i] = S[i];
return ret;
#define INT(...) int __VA_ARGS__; inp(__VA_ARGS__);
#define LL(...) ll __VA_ARGS__; inp(__VA_ARGS__);
#define STRING(...) string __VA_ARGS__; inp(__VA_ARGS__);
#define CHAR(...) char __VA_ARGS__; inp(__VA_ARGS__);
#define VEC(T, A, n) vector<T> A(n); inp(A);
#define VVEC(T, A, n, m) vector<vector<T>> A(n, vector<T>(m)); inp(A);
const ll MOD1 = 1000000007;
const ll MOD9 = 998244353;
template<class T> auto min(const T& a){
return *min_element(all(a));
template<class T> auto max(const T& a){
return *max_element(all(a));
template <class T, class S>
inline bool chmax(T &a, const S &b) {
return (a < b ? a = b, 1 : 0);
template <class T, class S>
inline bool chmin(T &a, const S &b) {
return (a > b ? a = b, 1 : 0);
void FLUSH(){cout << flush;}
void print(){cout << endl;}
template <class Head, class... Tail>
void print(Head &&head, Tail &&... tail) {
cout << head;
if (sizeof...(Tail)) cout << spa;
template<typename T>
void print(vector<T> &A){
int n = A.size();
for(int i = 0; i < n; i++){
cout << A[i];
if(i != n - 1) cout << ' ';
cout << endl;
template<typename T>
void print(vector<vector<T>> &A){
for(auto &row: A) print(row);
template<typename T, typename S>
void print(pair<T, S> &A){
cout << A.first << spa << A.second << endl;
template<typename T, typename S>
void print(vector<pair<T, S>> &A){
for(auto &row: A) print(row);
template<typename T, typename S>
void prisep(vector<T> &A, S sep){
int n = A.size();
for(int i = 0; i < n; i++){
cout << A[i];
if(i == n - 1) cout << endl;
else cout << sep;
template<typename T, typename S>
void priend(T A, S end){
cout << A << end;
template<typename T>
void priend(T A){
priend(A, spa);
template<class... T>
void inp(T&... a){
(cin >> ... >> a);
template<typename T>
void inp(vector<T> &A){
for(auto &a:A) cin >> a;
template<typename T>
void inp(vector<vector<T>> &A){
for(auto &row:A) inp(row);
template<typename T, typename S>
void inp(pair<T, S> &A){
inp(A.first, A.second);
template<typename T, typename S>
void inp(vector<pair<T, S>> &A){
for(auto &row: A) inp(row.first, row.second);
template<typename T>
T sum(vector<T> &A){
T tot = 0;
for(auto a:A) tot += a;
return tot;
template<typename T>
pair<vector<T>, map<T, int>> compression(vector<T> X){
X.erase(unique(all(X)), X.end());
map<T, int> mp;
for(int i = 0; i < X.size(); i++) mp[X[i]] = i;
return {X, mp};
#line 1 "atcoder/convolution.hpp"
#line 7 "atcoder/convolution.hpp"
#include <type_traits>
#line 9 "atcoder/convolution.hpp"
#line 1 "atcoder/internal_bit.hpp"
#ifdef _MSC_VER
#include <intrin.h>
namespace atcoder {
namespace internal {
// @param n `0 <= n`
// @return minimum non-negative `x` s.t. `n <= 2**x`
int ceil_pow2(int n) {
int x = 0;
while ((1U << x) < (unsigned int)(n)) x++;
return x;
// @param n `1 <= n`
// @return minimum non-negative `x` s.t. `(n & (1 << x)) != 0`
constexpr int bsf_constexpr(unsigned int n) {
int x = 0;
while (!(n & (1 << x))) x++;
return x;
// @param n `1 <= n`
// @return minimum non-negative `x` s.t. `(n & (1 << x)) != 0`
int bsf(unsigned int n) {
#ifdef _MSC_VER
unsigned long index;
_BitScanForward(&index, n);
return index;
return __builtin_ctz(n);
} // namespace internal
} // namespace atcoder
#line 1 "atcoder/modint.hpp"
#line 7 "atcoder/modint.hpp"
#ifdef _MSC_VER
#include <intrin.h>
#line 1 "atcoder/internal_math.hpp"
#line 5 "atcoder/internal_math.hpp"
#ifdef _MSC_VER
#include <intrin.h>
namespace atcoder {
namespace internal {
// @param m `1 <= m`
// @return x mod m
constexpr long long safe_mod(long long x, long long m) {
x %= m;
if (x < 0) x += m;
return x;
// Fast modular multiplication by barrett reduction
// Reference: https://en.wikipedia.org/wiki/Barrett_reduction
// NOTE: reconsider after Ice Lake
struct barrett {
unsigned int _m;
unsigned long long im;
// @param m `1 <= m < 2^31`
explicit barrett(unsigned int m) : _m(m), im((unsigned long long)(-1) / m + 1) {}
// @return m
unsigned int umod() const { return _m; }
// @param a `0 <= a < m`
// @param b `0 <= b < m`
// @return `a * b % m`
unsigned int mul(unsigned int a, unsigned int b) const {
// [1] m = 1
// a = b = im = 0, so okay
// [2] m >= 2
// im = ceil(2^64 / m)
// -> im * m = 2^64 + r (0 <= r < m)
// let z = a*b = c*m + d (0 <= c, d < m)
// a*b * im = (c*m + d) * im = c*(im*m) + d*im = c*2^64 + c*r + d*im
// c*r + d*im < m * m + m * im < m * m + 2^64 + m <= 2^64 + m * (m + 1) < 2^64 * 2
// ((ab * im) >> 64) == c or c + 1
unsigned long long z = a;
z *= b;
#ifdef _MSC_VER
unsigned long long x;
_umul128(z, im, &x);
unsigned long long x =
(unsigned long long)(((unsigned __int128)(z)*im) >> 64);
unsigned int v = (unsigned int)(z - x * _m);
if (_m <= v) v += _m;
return v;
// @param n `0 <= n`
// @param m `1 <= m`
// @return `(x ** n) % m`
constexpr long long pow_mod_constexpr(long long x, long long n, int m) {
if (m == 1) return 0;
unsigned int _m = (unsigned int)(m);
unsigned long long r = 1;
unsigned long long y = safe_mod(x, m);
while (n) {
if (n & 1) r = (r * y) % _m;
y = (y * y) % _m;
n >>= 1;
return r;
// Reference:
// M. Forisek and J. Jancina,
// Fast Primality Testing for Integers That Fit into a Machine Word
// @param n `0 <= n`
constexpr bool is_prime_constexpr(int n) {
if (n <= 1) return false;
if (n == 2 || n == 7 || n == 61) return true;
if (n % 2 == 0) return false;
long long d = n - 1;
while (d % 2 == 0) d /= 2;
constexpr long long bases[3] = {2, 7, 61};
for (long long a : bases) {
long long t = d;
long long y = pow_mod_constexpr(a, t, n);
while (t != n - 1 && y != 1 && y != n - 1) {
y = y * y % n;
t <<= 1;
if (y != n - 1 && t % 2 == 0) {
return false;
return true;
template <int n> constexpr bool is_prime = is_prime_constexpr(n);
// @param b `1 <= b`
// @return pair(g, x) s.t. g = gcd(a, b), xa = g (mod b), 0 <= x < b/g
constexpr std::pair<long long, long long> inv_gcd(long long a, long long b) {
a = safe_mod(a, b);
if (a == 0) return {b, 0};
// Contracts:
// [1] s - m0 * a = 0 (mod b)
// [2] t - m1 * a = 0 (mod b)
// [3] s * |m1| + t * |m0| <= b
long long s = b, t = a;
long long m0 = 0, m1 = 1;
while (t) {
long long u = s / t;
s -= t * u;
m0 -= m1 * u; // |m1 * u| <= |m1| * s <= b
// [3]:
// (s - t * u) * |m1| + t * |m0 - m1 * u|
// <= s * |m1| - t * u * |m1| + t * (|m0| + |m1| * u)
// = s * |m1| + t * |m0| <= b
auto tmp = s;
s = t;
t = tmp;
tmp = m0;
m0 = m1;
m1 = tmp;
// by [3]: |m0| <= b/g
// by g != b: |m0| < b/g
if (m0 < 0) m0 += b / s;
return {s, m0};
// Compile time primitive root
// @param m must be prime
// @return primitive root (and minimum in now)
constexpr int primitive_root_constexpr(int m) {
if (m == 2) return 1;
if (m == 167772161) return 3;
if (m == 469762049) return 3;
if (m == 754974721) return 11;
if (m == 998244353) return 3;
int divs[20] = {};
divs[0] = 2;
int cnt = 1;
int x = (m - 1) / 2;
while (x % 2 == 0) x /= 2;
for (int i = 3; (long long)(i)*i <= x; i += 2) {
if (x % i == 0) {
divs[cnt++] = i;
while (x % i == 0) {
x /= i;
if (x > 1) {
divs[cnt++] = x;
for (int g = 2;; g++) {
bool ok = true;
for (int i = 0; i < cnt; i++) {
if (pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) {
ok = false;
if (ok) return g;
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);
// @param n `n < 2^32`
// @param m `1 <= m < 2^32`
// @return sum_{i=0}^{n-1} floor((ai + b) / m) (mod 2^64)
unsigned long long floor_sum_unsigned(unsigned long long n,
unsigned long long m,
unsigned long long a,
unsigned long long b) {
unsigned long long ans = 0;
while (true) {
if (a >= m) {
ans += n * (n - 1) / 2 * (a / m);
a %= m;
if (b >= m) {
ans += n * (b / m);
b %= m;
unsigned long long y_max = a * n + b;
if (y_max < m) break;
// y_max < m * (n + 1)
// floor(y_max / m) <= n
n = (unsigned long long)(y_max / m);
b = (unsigned long long)(y_max % m);
std::swap(m, a);
return ans;
} // namespace internal
} // namespace atcoder
#line 1 "atcoder/internal_type_traits.hpp"
#line 7 "atcoder/internal_type_traits.hpp"
namespace atcoder {
namespace internal {
#ifndef _MSC_VER
template <class T>
using is_signed_int128 =
typename std::conditional<std::is_same<T, __int128_t>::value ||
std::is_same<T, __int128>::value,
template <class T>
using is_unsigned_int128 =
typename std::conditional<std::is_same<T, __uint128_t>::value ||
std::is_same<T, unsigned __int128>::value,
template <class T>
using make_unsigned_int128 =
typename std::conditional<std::is_same<T, __int128_t>::value,
unsigned __int128>;
template <class T>
using is_integral = typename std::conditional<std::is_integral<T>::value ||
is_signed_int128<T>::value ||
template <class T>
using is_signed_int = typename std::conditional<(is_integral<T>::value &&
std::is_signed<T>::value) ||
template <class T>
using is_unsigned_int =
typename std::conditional<(is_integral<T>::value &&
std::is_unsigned<T>::value) ||
template <class T>
using to_unsigned = typename std::conditional<
typename std::conditional<std::is_signed<T>::value,
template <class T> using is_integral = typename std::is_integral<T>;
template <class T>
using is_signed_int =
typename std::conditional<is_integral<T>::value && std::is_signed<T>::value,
template <class T>
using is_unsigned_int =
typename std::conditional<is_integral<T>::value &&
template <class T>
using to_unsigned = typename std::conditional<is_signed_int<T>::value,
template <class T>
using is_signed_int_t = std::enable_if_t<is_signed_int<T>::value>;
template <class T>
using is_unsigned_int_t = std::enable_if_t<is_unsigned_int<T>::value>;
template <class T> using to_unsigned_t = typename to_unsigned<T>::type;
} // namespace internal
} // namespace atcoder
#line 14 "atcoder/modint.hpp"
namespace atcoder {
namespace internal {
struct modint_base {};
struct static_modint_base : modint_base {};
template <class T> using is_modint = std::is_base_of<modint_base, T>;
template <class T> using is_modint_t = std::enable_if_t<is_modint<T>::value>;
} // namespace internal
template <int m, std::enable_if_t<(1 <= m)>* = nullptr>
struct static_modint : internal::static_modint_base {
using mint = static_modint;
static constexpr int mod() { return m; }
static mint raw(int v) {
mint x;
x._v = v;
return x;
static_modint() : _v(0) {}
template <class T, internal::is_signed_int_t<T>* = nullptr>
static_modint(T v) {
long long x = (long long)(v % (long long)(umod()));
if (x < 0) x += umod();
_v = (unsigned int)(x);
template <class T, internal::is_unsigned_int_t<T>* = nullptr>
static_modint(T v) {
_v = (unsigned int)(v % umod());
unsigned int val() const { return _v; }
mint& operator++() {
if (_v == umod()) _v = 0;
return *this;
mint& operator--() {
if (_v == 0) _v = umod();
return *this;
mint operator++(int) {
mint result = *this;
return result;
mint operator--(int) {
mint result = *this;
return result;
mint& operator+=(const mint& rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
mint& operator-=(const mint& rhs) {
_v -= rhs._v;
if (_v >= umod()) _v += umod();
return *this;
mint& operator*=(const mint& rhs) {
unsigned long long z = _v;
z *= rhs._v;
_v = (unsigned int)(z % umod());
return *this;
mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
return r;
mint inv() const {
if (prime) {
return pow(umod() - 2);
} else {
auto eg = internal::inv_gcd(_v, m);
assert(eg.first == 1);
return eg.second;
friend mint operator+(const mint& lhs, const mint& rhs) {
return mint(lhs) += rhs;
friend mint operator-(const mint& lhs, const mint& rhs) {
return mint(lhs) -= rhs;
friend mint operator*(const mint& lhs, const mint& rhs) {
return mint(lhs) *= rhs;
friend mint operator/(const mint& lhs, const mint& rhs) {
return mint(lhs) /= rhs;
friend bool operator==(const mint& lhs, const mint& rhs) {
return lhs._v == rhs._v;
friend bool operator!=(const mint& lhs, const mint& rhs) {
return lhs._v != rhs._v;
unsigned int _v;
static constexpr unsigned int umod() { return m; }
static constexpr bool prime = internal::is_prime<m>;
template <int id> struct dynamic_modint : internal::modint_base {
using mint = dynamic_modint;
static int mod() { return (int)(bt.umod()); }
static void set_mod(int m) {
assert(1 <= m);
bt = internal::barrett(m);
static mint raw(int v) {
mint x;
x._v = v;
return x;
dynamic_modint() : _v(0) {}
template <class T, internal::is_signed_int_t<T>* = nullptr>
dynamic_modint(T v) {
long long x = (long long)(v % (long long)(mod()));
if (x < 0) x += mod();
_v = (unsigned int)(x);
template <class T, internal::is_unsigned_int_t<T>* = nullptr>
dynamic_modint(T v) {
_v = (unsigned int)(v % mod());
unsigned int val() const { return _v; }
mint& operator++() {
if (_v == umod()) _v = 0;
return *this;
mint& operator--() {
if (_v == 0) _v = umod();
return *this;
mint operator++(int) {
mint result = *this;
return result;
mint operator--(int) {
mint result = *this;
return result;
mint& operator+=(const mint& rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
mint& operator-=(const mint& rhs) {
_v += mod() - rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
mint& operator*=(const mint& rhs) {
_v = bt.mul(_v, rhs._v);
return *this;
mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
return r;
mint inv() const {
auto eg = internal::inv_gcd(_v, mod());
assert(eg.first == 1);
return eg.second;
friend mint operator+(const mint& lhs, const mint& rhs) {
return mint(lhs) += rhs;
friend mint operator-(const mint& lhs, const mint& rhs) {
return mint(lhs) -= rhs;
friend mint operator*(const mint& lhs, const mint& rhs) {
return mint(lhs) *= rhs;
friend mint operator/(const mint& lhs, const mint& rhs) {
return mint(lhs) /= rhs;
friend bool operator==(const mint& lhs, const mint& rhs) {
return lhs._v == rhs._v;
friend bool operator!=(const mint& lhs, const mint& rhs) {
return lhs._v != rhs._v;
unsigned int _v;
static internal::barrett bt;
static unsigned int umod() { return bt.umod(); }
template <int id> internal::barrett dynamic_modint<id>::bt(998244353);
using modint998244353 = static_modint<998244353>;
using modint1000000007 = static_modint<1000000007>;
using modint = dynamic_modint<-1>;
namespace internal {
template <class T>
using is_static_modint = std::is_base_of<internal::static_modint_base, T>;
template <class T>
using is_static_modint_t = std::enable_if_t<is_static_modint<T>::value>;
template <class> struct is_dynamic_modint : public std::false_type {};
template <int id>
struct is_dynamic_modint<dynamic_modint<id>> : public std::true_type {};
template <class T>
using is_dynamic_modint_t = std::enable_if_t<is_dynamic_modint<T>::value>;
} // namespace internal
} // namespace atcoder
#line 12 "atcoder/convolution.hpp"
namespace atcoder {
namespace internal {
template <class mint,
int g = internal::primitive_root<mint::mod()>,
internal::is_static_modint_t<mint>* = nullptr>
struct fft_info {
static constexpr int rank2 = bsf_constexpr(mint::mod() - 1);
std::array<mint, rank2 + 1> root; // root[i]^(2^i) == 1
std::array<mint, rank2 + 1> iroot; // root[i] * iroot[i] == 1
std::array<mint, std::max(0, rank2 - 2 + 1)> rate2;
std::array<mint, std::max(0, rank2 - 2 + 1)> irate2;
std::array<mint, std::max(0, rank2 - 3 + 1)> rate3;
std::array<mint, std::max(0, rank2 - 3 + 1)> irate3;
fft_info() {
root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
iroot[rank2] = root[rank2].inv();
for (int i = rank2 - 1; i >= 0; i--) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 2; i++) {
rate2[i] = root[i + 2] * prod;
irate2[i] = iroot[i + 2] * iprod;
prod *= iroot[i + 2];
iprod *= root[i + 2];
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 3; i++) {
rate3[i] = root[i + 3] * prod;
irate3[i] = iroot[i + 3] * iprod;
prod *= iroot[i + 3];
iprod *= root[i + 3];
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly(std::vector<mint>& a) {
int n = int(a.size());
int h = internal::ceil_pow2(n);
static const fft_info<mint> info;
int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len < h) {
if (h - len == 1) {
int p = 1 << (h - len - 1);
mint rot = 1;
for (int s = 0; s < (1 << len); s++) {
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p] * rot;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
if (s + 1 != (1 << len))
rot *= info.rate2[bsf(~(unsigned int)(s))];
} else {
// 4-base
int p = 1 << (h - len - 2);
mint rot = 1, imag = info.root[2];
for (int s = 0; s < (1 << len); s++) {
mint rot2 = rot * rot;
mint rot3 = rot2 * rot;
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto mod2 = 1ULL * mint::mod() * mint::mod();
auto a0 = 1ULL * a[i + offset].val();
auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
auto a1na3imag =
1ULL * mint(a1 + mod2 - a3).val() * imag.val();
auto na2 = mod2 - a2;
a[i + offset] = a0 + a2 + a1 + a3;
a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
if (s + 1 != (1 << len))
rot *= info.rate3[bsf(~(unsigned int)(s))];
len += 2;
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly_inv(std::vector<mint>& a) {
int n = int(a.size());
int h = internal::ceil_pow2(n);
static const fft_info<mint> info;
int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len) {
if (len == 1) {
int p = 1 << (h - len);
mint irot = 1;
for (int s = 0; s < (1 << (len - 1)); s++) {
int offset = s << (h - len + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p];
a[i + offset] = l + r;
a[i + offset + p] =
(unsigned long long)(mint::mod() + l.val() - r.val()) *
if (s + 1 != (1 << (len - 1)))
irot *= info.irate2[bsf(~(unsigned int)(s))];
} else {
// 4-base
int p = 1 << (h - len);
mint irot = 1, iimag = info.iroot[2];
for (int s = 0; s < (1 << (len - 2)); s++) {
mint irot2 = irot * irot;
mint irot3 = irot2 * irot;
int offset = s << (h - len + 2);
for (int i = 0; i < p; i++) {
auto a0 = 1ULL * a[i + offset + 0 * p].val();
auto a1 = 1ULL * a[i + offset + 1 * p].val();
auto a2 = 1ULL * a[i + offset + 2 * p].val();
auto a3 = 1ULL * a[i + offset + 3 * p].val();
auto a2na3iimag =
1ULL *
mint((mint::mod() + a2 - a3) * iimag.val()).val();
a[i + offset] = a0 + a1 + a2 + a3;
a[i + offset + 1 * p] =
(a0 + (mint::mod() - a1) + a2na3iimag) * irot.val();
a[i + offset + 2 * p] =
(a0 + a1 + (mint::mod() - a2) + (mint::mod() - a3)) *
a[i + offset + 3 * p] =
(a0 + (mint::mod() - a1) + (mint::mod() - a2na3iimag)) *
if (s + 1 != (1 << (len - 2)))
irot *= info.irate3[bsf(~(unsigned int)(s))];
len -= 2;
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_naive(const std::vector<mint>& a,
const std::vector<mint>& b) {
int n = int(a.size()), m = int(b.size());
std::vector<mint> ans(n + m - 1);
if (n < m) {
for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
ans[i + j] += a[i] * b[j];
} else {
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
ans[i + j] += a[i] * b[j];
return ans;
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_fft(std::vector<mint> a, std::vector<mint> b) {
int n = int(a.size()), m = int(b.size());
int z = 1 << internal::ceil_pow2(n + m - 1);
for (int i = 0; i < z; i++) {
a[i] *= b[i];
a.resize(n + m - 1);
mint iz = mint(z).inv();
for (int i = 0; i < n + m - 1; i++) a[i] *= iz;
return a;
} // namespace internal
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(std::vector<mint>&& a, std::vector<mint>&& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
if (std::min(n, m) <= 60) return convolution_naive(a, b);
return internal::convolution_fft(a, b);
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(const std::vector<mint>& a,
const std::vector<mint>& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
if (std::min(n, m) <= 60) return convolution_naive(a, b);
return internal::convolution_fft(a, b);
template <unsigned int mod = 998244353,
class T,
std::enable_if_t<internal::is_integral<T>::value>* = nullptr>
std::vector<T> convolution(const std::vector<T>& a, const std::vector<T>& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
using mint = static_modint<mod>;
std::vector<mint> a2(n), b2(m);
for (int i = 0; i < n; i++) {
a2[i] = mint(a[i]);
for (int i = 0; i < m; i++) {
b2[i] = mint(b[i]);
auto c2 = convolution(move(a2), move(b2));
std::vector<T> c(n + m - 1);
for (int i = 0; i < n + m - 1; i++) {
c[i] = c2[i].val();
return c;
std::vector<long long> convolution_ll(const std::vector<long long>& a,
const std::vector<long long>& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
static constexpr unsigned long long MOD1 = 754974721; // 2^24
static constexpr unsigned long long MOD2 = 167772161; // 2^25
static constexpr unsigned long long MOD3 = 469762049; // 2^26
static constexpr unsigned long long M2M3 = MOD2 * MOD3;
static constexpr unsigned long long M1M3 = MOD1 * MOD3;
static constexpr unsigned long long M1M2 = MOD1 * MOD2;
static constexpr unsigned long long M1M2M3 = MOD1 * MOD2 * MOD3;
static constexpr unsigned long long i1 =
internal::inv_gcd(MOD2 * MOD3, MOD1).second;
static constexpr unsigned long long i2 =
internal::inv_gcd(MOD1 * MOD3, MOD2).second;
static constexpr unsigned long long i3 =
internal::inv_gcd(MOD1 * MOD2, MOD3).second;
auto c1 = convolution<MOD1>(a, b);
auto c2 = convolution<MOD2>(a, b);
auto c3 = convolution<MOD3>(a, b);
std::vector<long long> c(n + m - 1);
for (int i = 0; i < n + m - 1; i++) {
unsigned long long x = 0;
x += (c1[i] * i1) % MOD1 * M2M3;
x += (c2[i] * i2) % MOD2 * M1M3;
x += (c3[i] * i3) % MOD3 * M1M2;
// B = 2^63, -B <= x, r(real value) < B
// (x, x - M, x - 2M, or x - 3M) = r (mod 2B)
// r = c1[i] (mod MOD1)
// focus on MOD1
// r = x, x - M', x - 2M', x - 3M' (M' = M % 2^64) (mod 2B)
// r = x,
// x - M' + (0 or 2B),
// x - 2M' + (0, 2B or 4B),
// x - 3M' + (0, 2B, 4B or 6B) (without mod!)
// (r - x) = 0, (0)
// - M' + (0 or 2B), (1)
// -2M' + (0 or 2B or 4B), (2)
// -3M' + (0 or 2B or 4B or 6B) (3) (mod MOD1)
// we checked that
// ((1) mod MOD1) mod 5 = 2
// ((2) mod MOD1) mod 5 = 3
// ((3) mod MOD1) mod 5 = 4
long long diff =
c1[i] - internal::safe_mod((long long)(x), (long long)(MOD1));
if (diff < 0) diff += MOD1;
static constexpr unsigned long long offset[5] = {
0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3};
x -= offset[diff % 5];
c[i] = x;
return c;
} // namespace atcoder
#line 3 "_tmp/wildcard_matching.hpp"
#line 5 "_tmp/wildcard_matching.hpp"
using namespace std;
template<unsigned int MOD>
vector<bool> wildcard_matching(vector<int> &S, vector<int> &T){
int n = S.size();
int m = T.size();
vector<long long> S3(n);
vector<long long> S2(n);
vector<long long> S1(n);
for(int i = 0; i < n; i++){
long long x = S[i];
long long y = int(S[i] > 0);
S3[i] = y * x * x % MOD;
S2[i] = y * x;
S1[i] = y;
vector<long long> T3(n);
vector<long long> T2(n);
vector<long long> T1(n);
for(int i = 0; i < m; i++){
long long x = T[i];
long long y = int(T[i] > 0);
T3[m - 1 - i] = y * x * x % MOD;
T2[m - 1 - i] = y * x;
T1[m - 1 - i] = y;
auto res1 = atcoder::convolution<MOD>(S3, T1);
auto res2 = atcoder::convolution<MOD>(S2, T2);
auto res3 = atcoder::convolution<MOD>(S1, T3);
vector<bool> res(n - m + 1);
for(int i = 0; i < n - m + 1; i++){
long long x = res1[i + m - 1] - 2 * res2[i + m - 1] + res3[i + m - 1];
res[i] = bool(x == 0);
return res;
vector<int> wildcard_matching(vector<int> &S, vector<int> &T){
auto res1 = wildcard_matching<998244353>(S, T);
auto res2 = wildcard_matching<924844033>(S, T);
auto res3 = wildcard_matching<1012924417>(S, T);
vector<int> res;
for(int i = 0; i < res1.size(); i++){
if(res1[i] && res2[i] && res3[i]) res.push_back(i);
return res;
template<unsigned int MOD>
vector<bool> wildcard_matching(vector<long long> &S, vector<long long> &T){
int n = S.size();
int m = T.size();
vector<long long> S3(n);
vector<long long> S2(n);
vector<long long> S1(n);
for(int i = 0; i < n; i++){
long long x = S[i];
long long y = int(S[i] > 0);
S3[i] = y * x * x % MOD;
S2[i] = y * x;
S1[i] = y;
vector<long long> T3(n);
vector<long long> T2(n);
vector<long long> T1(n);
for(int i = 0; i < m; i++){
long long x = T[i];
long long y = int(T[i] > 0);
T3[m - 1 - i] = y * x * x % MOD;
T2[m - 1 - i] = y * x;
T1[m - 1 - i] = y;
auto res1 = atcoder::convolution<MOD>(S3, T1);
auto res2 = atcoder::convolution<MOD>(S2, T2);
auto res3 = atcoder::convolution<MOD>(S1, T3);
vector<bool> res(n - m + 1);
for(int i = 0; i < n - m + 1; i++){
long long x = res1[i + m - 1] - 2 * res2[i + m - 1] + res3[i + m - 1];
res[i] = bool(x == 0);
return res;
vector<int> wildcard_matching(vector<long long> &S, vector<long long> &T){
auto res1 = wildcard_matching<998244353>(S, T);
auto res2 = wildcard_matching<924844033>(S, T);
auto res3 = wildcard_matching<1012924417>(S, T);
vector<int> res;
for(int i = 0; i < res1.size(); i++){
if(res1[i] && res2[i] && res3[i]) res.push_back(i);
return res;
vector<int> wildcard_matching(vector<char> &S, vector<char> &T, char wild='?'){
char mi = S[0];
int n = S.size();
int m = T.size();
for(int i = 0; i < n; i++){
if(S[i] != wild && S[i] < mi) mi = S[i];
for(int i = 0; i < m; i++){
if(T[i] != wild && T[i] < mi) mi = T[i];
vector<int> SS(n);
vector<int> TT(m);
for(int i = 0; i < n; i++){
if(S[i] == wild) SS[i] = 0;
else SS[i] = S[i] - mi + 1;
for(int i = 0; i < m; i++){
if(T[i] == wild) TT[i] = 0;
else TT[i] = T[i] - mi + 1;
auto res1 = wildcard_matching<998244353>(SS, TT);
auto res2 = wildcard_matching<924844033>(SS, TT);
auto res3 = wildcard_matching<1012924417>(SS, TT);
vector<int> res;
for(int i = 0; i < res1.size(); i++){
if(res1[i] && res2[i] && res3[i]) res.push_back(i);
return res;
vector<int> wildcard_matching(string &S, string &T, char wild='?'){
char mi = S[0];
int n = S.size();
int m = T.size();
for(int i = 0; i < n; i++){
if(S[i] != wild && S[i] < mi) mi = S[i];
for(int i = 0; i < m; i++){
if(T[i] != wild && T[i] < mi) mi = T[i];
vector<int> SS(n);
vector<int> TT(m);
for(int i = 0; i < n; i++){
if(S[i] == wild) SS[i] = 0;
else SS[i] = S[i] - mi + 1;
for(int i = 0; i < m; i++){
if(T[i] == wild) TT[i] = 0;
else TT[i] = T[i] - mi + 1;
auto res1 = wildcard_matching<998244353>(SS, TT);
auto res2 = wildcard_matching<924844033>(SS, TT);
auto res3 = wildcard_matching<1012924417>(SS, TT);
vector<int> res;
for(int i = 0; i < res1.size(); i++){
if(res1[i] && res2[i] && res3[i]) res.push_back(i);
return res;
#line 2 "Library/C++/string/safetyRollingHash.hpp"
template<typename T = char>
struct RollingHash{
using u64 = uint64_t;
using u128 = __uint128_t;
int n;
u64 base;
const u64 MOD = (1ull << 61ull) - 1;
vector<u64> pw, h;
RollingHash(vector<T> &S, u64 base) : base(base){
n = S.size();
pw.assign(n + 1, 1ull);
h.assign(n + 1, 0ull);
for(int i = 0; i < n; i++){
pw[i + 1] = Mul(pw[i], base);
h[i + 1] = Add(Mul(h[i], base), S[i]);
RollingHash(string &S, u64 base) : base(base){
n = S.size();
pw.assign(n + 1, 1ull);
h.assign(n + 1, 0ull);
for(int i = 0; i < n; i++){
pw[i + 1] = Mul(pw[i], base);
h[i + 1] = Add(Mul(h[i], base), S[i]);
u64 get(int l, int r){
return Add(h[r], MOD - Mul(h[l], pw[r - l]));
u64 Mul(u64 a, u64 b){
u128 c = (u128) a * b;
return Add(c >> 61, c & MOD);
u64 Add(u64 a, u64 b){
a += b;
if(a >= MOD) a -= MOD;
return a;
#line 2 "Library/C++/other/RandomNumberGenerator.hpp"
struct RandomNumberGenerator{
mt19937 mt;
RandomNumberGenerator() : mt(chrono::steady_clock::now().time_since_epoch().count()) {}
int operator()(int a, int b){
uniform_int_distribution< int > dist(a, b - 1);
return dist(mt);
int operator()(int b){
return (*this)(0, b);
#line 190 "A.cpp"
void solve(){
INT(n, m);
auto res = wildcard_matching(S, T);
fori(i, n){
if(S[i] == '?') S[i] = 'a';
RandomNumberGenerator rnd;
int base = rnd(1 << 30);
RollingHash rhs(S, base);
RollingHash rht(T, base);
auto calc=[&](int i, int l){
if(l <= i){
return rhs.get(0, l);
else if(l <= i + m){
return rhs.Add(rhs.Mul(rhs.get(0, i), rhs.pw[l - i]), rht.get(0, l - i));
auto res = rhs.Add(rhs.Mul(rhs.get(0, i), rhs.pw[m]), rht.get(0, m));
return rhs.Add(res, rhs.Mul(res, rhs.pw[l - i - m])), rhs.get(i + m, l);
auto get=[&](int i, int l){
if(l < i){
return S[l];
else if(l < i + m){
return T[l - i];
return S[l];
int ind = res[0];
fori(i, 1, res.size()){
int l = 0;
int r = n;
while(r - l > 1){
int mid = (l + r) / 2;
if(calc(res[i], mid) == calc(ind, mid)) l = mid;
else r = mid;
if(get(res[i], l) < get(ind, l)) ind = res[i];
fori(i, m){
S[i + ind] = T[i];
int main(){
// cout << fixed << setprecision(12);
int t;
t = 1;
cin >> t;
while(t--) solve();
return 0;