#define SINGLE_TESTCASE
// #define MULTI_TESTCASE
// #define AOJ_TESTCASE

#define FAST_IO
// #define FAST_CIO
// #define INTERACTIVE

#define INF 4'000'000'000'000'000'037LL
#define EPS 1e-11



/**
 * @brief テンプレート（型）
 * @docs docs/template/template_types.md
 */

#include <bits/stdc++.h>
using namespace std;

#ifndef EPS
#define EPS 1e-11
#endif
using ld = decltype(EPS);

using ll = long long;
using uint = unsigned int;
using ull = unsigned long long;
using pll = pair<ll, ll>;
using tlll = tuple<ll, ll, ll>;
using tllll = tuple<ll, ll, ll, ll>;

#define vc vector
template <class T>
using vvc = vc<vc<T>>;
template <class T>
using vvvc = vc<vc<vc<T>>>;

using vb = vc<bool>;
using vl = vc<ll>;
using vpll = vc<pll>;
using vtlll = vc<tlll>;
using vtllll = vc<tllll>;
using vstr = vc<string>;
using vvb = vvc<bool>;
using vvl = vvc<ll>;

template <class T>
using pql = priority_queue<T, vc<T>, greater<T>>;
template <class T>
using pqg = priority_queue<T>;

#ifdef __SIZEOF_INT128__
using i128 = __int128_t;
using u128 = __uint128_t;
i128 stoi128(const string &s)
{
  i128 res = 0;
  if (s.front() == '-')
  {
    for (int i = 1; i < (int)s.size(); i++)
      res = 10 * res + s[i] - '0';
    res = -res;
  }
  else
  {
    for (auto &&c : s)
      res = 10 * res + c - '0';
  }
  return res;
}
string i128tos(i128 x)
{
  if (x == 0) return "0";
  string sign = "", res = "";
  if (x < 0)
    x = -x, sign = "-";
  while (x > 0)
  {
    res += '0' + x % 10;
    x /= 10;
  }
  reverse(res.begin(), res.end());
  return sign + res;
}
istream &operator>>(istream &is, i128 &a)
{
  string s;
  is >> s;
  a = stoi128(s);
  return is;
}
ostream &operator<<(ostream &os, const i128 &a)
{
  os << i128tos(a);
  return os;
}
#endif

#define cauto const auto


/**
 * @brief テンプレート（rep）
 * @docs docs/template/template_rep.md
 */

// https://trap.jp/post/1224/

#define overload4(_1, _2, _3, _4, name, ...) name
#define rep1(i, n) for (ll i = 0, nnnnn = ll(n); i < nnnnn; i++)
#define rep2(i, l, r) for (ll i = ll(l), rrrrr = ll(r); i < rrrrr; i++)
#define rep3(i, l, r, d) for (ll i = ll(l), rrrrr = ll(r), ddddd = ll(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d)
#define rep(...) overload4(__VA_ARGS__, rep3, rep2, rep1)(__VA_ARGS__)
#define repi1(i, n) for (int i = 0, nnnnn = int(n); i < nnnnn; i++)
#define repi2(i, l, r) for (int i = int(l), rrrrr = int(r); i < rrrrr; i++)
#define repi3(i, l, r, d) for (int i = int(l), rrrrr = int(r), ddddd = int(d); ddddd > 0 ? i < rrrrr : i > rrrrr; i += d)
#define repi(...) overload4(__VA_ARGS__, repi3, repi2, repi1)(__VA_ARGS__)

#define fe(...) for (auto __VA_ARGS__)
#define fec(...) for (cauto &__VA_ARGS__)
#define fem(...) for (auto &__VA_ARGS__)

#ifndef INF
#define INF 4'000'000'000'000'000'037LL
#endif
#ifndef EPS
#define EPS 1e-11
#endif


/**
 * @brief テンプレート（演算）
 * @docs docs/template/template_math.md
 */

template <class T, class U>
inline bool chmin(T &a, U b) { return a > b ? a = b, true : false; }
template <class T, class U>
inline bool chmax(T &a, U b) { return a < b ? a = b, true : false; }

template <class T = ll, class U, class V>
inline constexpr T divfloor(U a, V b) { return T(a) / T(b) - (T(a) % T(b) && (T(a) ^ T(b)) < 0); }
template <class T = ll, class U, class V>
inline constexpr T divceil(U a, V b) { return T(a) / T(b) + (T(a) % T(b) && (T(a) ^ T(b)) >= 0); }
template <class T = ll, class U, class V>
inline constexpr T divround(U a, V b) { return divfloor<T>(2 * T(a) + T(b), 2 * T(b)); }
template <class T = ll, class U, class V>
inline constexpr T safemod(U a, V b) { return T(a) - T(b) * divfloor<T>(a, b); }

template <class T = ll, class U, class V>
constexpr T ipow(U a, V b)
{
  assert(b >= 0);
  if (b == 0)
    return 1;
  if (a == 0 || a == 1)
    return a;
  if (a < 0 && a == -1)
    return b & 1 ? -1 : 1;

  T res = 1, tmp = a;
  while (true)
  {
    if (b & 1)
      res *= tmp;
    b >>= 1;
    if (b == 0)
      break;
    tmp *= tmp;
  }
  return res;
}
template <class T = ll, class A, class B, class M>
T mul_limited(A a, B b, M m = INF)
{
  assert(a >= 0 && b >= 0 && m >= 0);
  if (b == 0)
    return 0;
  return T(a) > T(m) / T(b) ? T(m) : T(a) * T(b);
}
template <class T = ll, class A, class B, class M>
T pow_limited(A a, B b, M m = INF)
{
  assert(a >= 0 && b >= 0 && m >= 0);
  if (a <= 1 || b == 0)
    return min(ipow<T>(a, b), T(m));
  
  T res = 1, tmp = a;
  while (true)
  {
    if (b & 1)
    {
      if (res > T(m) / tmp)
        return m;
      res *= tmp;
    }
    b >>= 1;
    if (b == 0)
      break;
    if (tmp > T(m) / tmp)
      return m;
    tmp *= tmp;
  }
  return res;
}

template <class T = ll, class A, class K>
constexpr T iroot(A a, K k)
{
  assert(a >= 0 && k >= 1);
  if (a <= 1 || k == 1)
    return a;
  if (k == 2)
  {
    if constexpr (sizeof(T) > sizeof(ull))
    {
      if ((u128)a < ((u128)1 << 120))
        return sqrtl(a);
    }
    else
      return sqrtl(a);
  }

  auto isok = [&](T x) -> bool
  {
    if (x == 0)
      return true;
    T res = 1, k2 = k;
    while (true)
    {
      if (k2 & 1)
      {
        if (res > T(a) / x)
          return false;
        res *= x;
      }
      k2 >>= 1;
      if (k2 == 0)
        break;
      if (x > T(a) / x)
        return false;
      x *= x;
    }
    return res <= T(a);
  };

  T x = pow(a, 1.0 / k);
  bool up = true;
  while (!isok(x))
    up = false, x--;
  if (up)
  {
    while (x < numeric_limits<T>::max() && isok(x + 1))
      x++;
  }
  return x;
}

// https://misawa.github.io/others/avoid_errors/techniques_to_avoid_errors.html
template <class D = decltype(EPS), class A>
int SGN(A a, const D &eps = EPS) { return int(a > eps) - int(a < -eps); }

// 位取り記数法と同じ順番（下位桁が後ろ）
// 0 に対しては {0} が返る
template <class T = ll, class U, class V>
vc<T> base_repr(U val, V base)
{
  assert(val >= 0);
  assert(base >= 2);
  if (val == 0)
    return {0};
  vc<T> a;
  while (val > 0)
  {
    a.emplace_back(val % base);
    val /= base;
  }
  reverse(a.begin(), a.end());
  return a;
}
// 位取り記数法と同じ順番（下位桁が後ろ）
template <class T = ll, class U, class V>
vc<T> base_repr(U val, V base, int n)
{
  assert(val >= 0);
  assert(base >= 2);
  assert(n >= 0);
  vc<T> a(n);
  repi(i, n)
  {
    a[i] = val % base;
    val /= base;
  }
  reverse(a.begin(), a.end());
  return a;
}
template <const bool use_upper = true, class U>
string base_repr_str(U val, int base)
{
  assert(val >= 0);
  assert(2 <= base && base <= 36);
  auto a = base_repr(val, base);
  string s = "";
  for (cauto &ai : a)
    s += (ai < 10 ? '0' + ai : (use_upper ? 'A' : 'a') + (ai - 10));
  return s;
}
template <const bool use_upper = true, class U>
string base_repr_str(U val, int base, int n)
{
  assert(val >= 0);
  assert(2 <= base && base <= 36);
  assert(n >= 0);
  auto a = base_repr(val, base, n);
  string s = "";
  for (cauto &ai : a)
    s += (ai < 10 ? '0' + ai : (use_upper ? 'A' : 'a') + (ai - 10));
  return s;
}


/**
 * @brief テンプレート（vector）
 * @docs docs/template/template_vector.md
 */

#define ALL(a) (a).begin(), (a).end()
template <class T = ll, class V>
inline T SZ(const V &x) { return x.size(); }
#define eb emplace_back

template <class F>
auto gen_vec(const int &n, const F &f)
{
  vc<decltype(f(0))> res(n);
  repi(i, n) res[i] = f(i);
  return res;
}

// https://qiita.com/Chippppp/items/13150f5e0ea99f444d97#%E5%A4%9A%E6%AC%A1%E5%85%83vector%E7%94%9F%E6%88%90%E9%96%A2%E6%95%B0
template <class T, size_t d, size_t i = 0, class V>
auto dvec(const V (&sz)[d], const T &init)
{
  if constexpr (i < d)
    return vc(sz[i], dvec<T, d, i + 1>(sz, init));
  else
    return init;
}

template <class T = ll>
T ctol(const char &c, const string &s)
{
  repi(i, SZ<int>(s)) if (s[i] == c) return i;
  return -1;
}
template <class T = ll>
vc<T> stov(const string &s, const char &first)
{
  return gen_vec(SZ<int>(s), [&](int i) -> T
                 { return s[i] - first; });
}
template <class T = ll>
vc<T> stov(const string &s, const string &t)
{
  return gen_vec(SZ<int>(s), [&](int i) -> T
                 { return ctol(s[i], t); });
}

template <class T>
vc<T> concat(const vvc<T> &vs)
{
  vc<T> res;
  for (cauto &v : vs)
    res.insert(res.end(), ALL(v));
  return res;
}
template <class T>
vc<T> concat(const vc<T> &v) { return v; }
template <class T, class... Ts>
vc<T> concat(vc<T> v, const vc<Ts> &...vs)
{
  (v.insert(v.end(), ALL(vs)), ...);
  return v;
}

template <class T, class I>
T vecget(const vc<T> &v, I i, const T &dflt_negative = -INF, const T &dflt_positive = INF)
{
  if (i < 0)
    return dflt_negative;
  if (i >= SZ<int>(v))
    return dflt_positive;
  return v[i];
}

#ifndef INF
#define INF 4'000'000'000'000'000'037LL
#endif


/**
 * @brief テンプレート（アルゴリズム）
 * @docs docs/template/template_algo.md
 */

template <class V>
auto SUM(const V &v)
{
  typename V::value_type s{};
  fec(vi : v) s += vi;
  return s;
}
template <class T, class V>
T SUM(const V &v)
{
  T s{};
  fec(vi : v) s += vi;
  return s;
}
template <class V>
auto MAX(const V &v) { return *max_element(ALL(v)); }
template <class V>
auto MIN(const V &v) { return *min_element(ALL(v)); }
template <class I = ll, class V>
I ARGMAX(const V &v) { return max_element(ALL(v)) - v.begin(); }
template <class I = ll, class V>
I ARGMIN(const V &v) { return min_element(ALL(v)) - v.begin(); }

template<class T = ll, class V>
T mex(const V &a)
{
  int n = a.size();
  vector<bool> exists(n, false);
  repi(i, n) if (0 <= a[i] && a[i] < n) exists[a[i]] = true;
  repi(x, n) if (!exists[x]) return x;
  return n;
}

template <class T = ll>
vc<T> permid(const int &n, const int &base_index = 0)
{
  vc<T> p(n);
  repi(i, n) p[i] = i + base_index;
  return p;
}
template <class T>
vc<T> perminv(const vc<T> &p)
{
  if (p.empty())
    return {};
  const int n = p.size();
  vc<T> q(MAX(p) + 1);
  repi(i, n) if (p[i] >= 0) q[p[i]] = i;
  return q;
}
// a[p[i]] for all i
template <class T, class U>
vc<T> permuted(const vc<T> &a, const vc<U> &p)
{
  const int n = p.size();
  vc<T> res(n);
  repi(i, n)
  {
    assert(0 <= p[i] && p[i] < U(a.size()));
    res[i] = a[p[i]];
  }
  return res;
}

template <class V>
V reversed(const V &v) { return V(v.rbegin(), v.rend()); }

#if __cplusplus < 202002L
template <class V, class... Args>
V sorted(V v, Args&&... args)
{
  sort(ALL(v), forward<Args>(args)...);
  return v;
}
#else
template <class V, class... Args>
V sorted(V v, Args&&... args)
{
  ranges::sort(v, forward<Args>(args)...);
  return v;
}
#endif

template <class V>
void unique(V &v) { v.erase(std::unique(ALL(v)), v.end()); }
template <class V>
V uniqued(V v) { unique(v); return v; }

template <class V>
void sortunique(V &v)
{
  sort(ALL(v));
  unique(v);
}
template <class V>
V sortuniqued(V v) { sortunique(v); return v; }

// 01234 -> 12340
template <class V, class U>
void rotate(V &v, U k)
{ 
  const U n = v.size();
  k = (k % n + n) % n;
  std::rotate(v.begin(), v.begin() + k, v.end());
}
// 01234 -> 12340
template <class V, class U>
V rotated(V v, U k) { rotate(v, k); return v; }

template <class T>
vvc<T> top(const vvc<T> &a)
{
  if (a.empty())
    return {};
  const int n = a.size(), m = a[0].size();
  vvc<T> b(m, vc<T>(n));
  repi(i, n)
  {
    assert(SZ<int>(a[i]) == m);
    repi(j, m) b[j][i] = a[i][j];
  }
  return b;
}
vstr top(const vstr &a)
{
  vvc<char> a_(a.size());
  repi(i, SZ<int>(a)) a_[i] = {ALL(a[i])};
  vvc<char> b_ = top(a_);
  vstr b(b_.size());
  repi(i, SZ<int>(b)) b[i] = {ALL(b_[i])};
  return b;
}

// 12
// 34 -> 246
// 56    135
// (反時計回り)
template <class VV, class U = ll>
VV rot90(const VV &a, U k = 1)
{
  if (a.empty())
    return {};
  const int n = a.size(), m = a[0].size();
  k = (k % 4 + 4) % 4;
  if (k == 0)
    return a;
  else if (k == 1)
  {
    VV b(m);
    repi(j, m) b[j].resize(n);
    repi(i, n)
    {
      assert(SZ<int>(a[i]) == m);
      repi(j, m) b[m - 1 - j][i] = a[i][j];
    }
    return b;
  }
  else if (k == 2)
  {
    VV b(n);
    repi(i, n) b[i].resize(m);
    repi(i, n)
    {
      assert(SZ<int>(a[i]) == m);
      repi(j, m) b[n - 1 - i][m - 1 - j] = a[i][j];
    }
    return b;
  }
  else
  {
    VV b(m);
    repi(j, m) b[j].resize(n);
    repi(i, n)
    {
      assert(SZ<int>(a[i]) == m);
      repi(j, m) b[j][n - 1 - i] = a[i][j];
    }
    return b;
  }
}

template <class T>
struct MonoidAdd
{
  using S = T;
  static constexpr S op(S a, S b) { return a + b; }
  static constexpr S e() { return 0; }
};
template <class T, const T infty = INF>
struct MonoidMin
{
  using S = T;
  static constexpr S op(S a, S b) { return min(a, b); }
  static constexpr S e() { return infty; }
};
template <class T, const T infty = INF>
struct MonoidMax
{
  using S = T;
  static constexpr S op(S a, S b) { return max(a, b); }
  static constexpr S e() { return -infty; }
};

// left_index が 0 なら、長さ n+1 で a.front() が e()
// left_index が 1 なら、長さ n で e() がない
template <class M>
vc<typename M::S> cuml(const vc<typename M::S> &v, int left_index = 0)
{
  const int n = v.size();
  vc<typename M::S> res(n + 1);
  res[0] = M::e();
  repi(i, n) res[i + 1] = M::op(res[i], v[i]);
  res.erase(res.begin(), res.begin() + left_index);
  return res;
}
// right_index が 0 なら、長さ n+1 で a.back() が e()
// right_index が 1 なら、長さ n で e() がない
template <class M>
vc<typename M::S> cumr(const vc<typename M::S> &v, int right_index = 0)
{ return reversed(cuml<M>(reversed(v), right_index)); }
template <class T>
vc<T> cumlsum(const vc<T> &v, int left_index = 0)
{ return cuml<MonoidAdd<T>>(v, left_index); }
template <class T>
vc<T> cumrsum(const vc<T> &v, int right_index = 0)
{ return cumr<MonoidAdd<T>>(v, right_index); }
template <class T>
vc<T> cumlmin(const vc<T> &v, int left_index = 0)
{ return cuml<MonoidMin<T>>(v, left_index); }
template <class T>
vc<T> cumrmin(const vc<T> &v, int right_index = 0)
{ return cumr<MonoidMin<T>>(v, right_index); }
template <class T>
vc<T> cumlmax(const vc<T> &v, int left_index = 0)
{ return cuml<MonoidMax<T>>(v, left_index); }
template <class T>
vc<T> cumrmax(const vc<T> &v, int right_index = 0)
{ return cumr<MonoidMax<T>>(v, right_index); }

// デフォルトでは長さ n+1
// left_index, right_index をそれぞれ 1 にすると、左右が削除される
template <class T>
vc<T> adjd(const vc<T> &v, int left_index = 0, int right_index = 0)
{
  int n = v.size();
  vc<T> res(n + 1);
  res[0] = v[0];
  repi(i, 1, n) res[i] = v[i] - v[i - 1];
  res[n] = -v[n - 1];
  res.erase(res.end() - right_index, res.end());
  res.erase(res.begin(), res.begin() + left_index);
  return res;
}

const vpll DRULgrid = {{1, 0}, {0, 1}, {-1, 0}, {0, -1}};
const vpll DRULplane = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};


/**
 * @brief テンプレート（二分探索）
 * @docs docs/template/template_binsearch.md
 */

template <class T>
struct is_random_access_iterator
{
  static constexpr bool value = is_same_v<
    typename iterator_traits<T>::iterator_category,
    random_access_iterator_tag
  >;
};
template <class T>
constexpr bool is_random_access_iterator_v = is_random_access_iterator<T>::value;

// --- LB, UB ---

#if __cplusplus < 202002L
struct identity
{
  template <class T>
  constexpr T &&operator()(T &&t) const noexcept
  { return forward<T>(t); }
};
namespace internal
{
  template <class T = ll, class V, class Judge>
  inline T bound_helper(const V &v, Judge judge)
  {
    int l = -1, r = v.size();
    while (r - l > 1)
    {
      int m = (l + r) / 2;
      if (judge(m))
        l = m;
      else
        r = m;
    }
    return r;
  }
};
// val <= v[i] となる最小の i (val 未満の値の個数)
template <class T = ll, class V, class Value, class Comp = less<>, class Proj = identity>
inline T LB(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
{
  return internal::bound_helper(v, [&](int i) -> bool
                                { return comp(proj(*(v.begin() + i)), val); });
}
// val < v[i] となる最小の i (val 以下の値の個数)
template <class T = ll, class V, class Value, class Comp = less<>, class Proj = identity>
inline T UB(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
{
  return internal::bound_helper(v, [&](int i) -> bool
                                { return !comp(val, proj(*(v.begin() + i))); });
}
#define DEFAULT_COMP less<>
#else
// val <= v[i] となる最小の i (val 未満の値の個数)
template <class T = ll, class V, class Value, class Comp = ranges::less, class Proj = identity>
inline T LB(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
{ return ranges::lower_bound(v, val, comp, proj) - v.begin(); }
// val < v[i] となる最小の i (val 以下の値の個数)
template <class T = ll, class V, class Value, class Comp = ranges::less, class Proj = identity>
inline T UB(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
{ return ranges::upper_bound(v, val, comp, proj) - v.begin(); }
#define DEFAULT_COMP ranges::less
#endif

// --- vector 等の lt, leq, gt, geq ---

// v[i] < val となる最大の i (なければ -1)
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto lt_max(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return LB<T>(v, val, comp, proj) - 1; }
// v[i] <= val となる最大の i (なければ -1)
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto leq_max(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return UB<T>(v, val, comp, proj) - 1; }
// val < v[i] となる最小の i (なければ n)
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto gt_min(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return UB<T>(v, val, comp, proj); }
// val <= v[i] となる最小の i (なければ n)
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto geq_min(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return LB<T>(v, val, comp, proj); }
// v[i] < val となる i の個数
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto lt_cnt(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return LB<T>(v, val, comp, proj); }
// v[i] <= val となる i の個数
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto leq_cnt(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return UB<T>(v, val, comp, proj); }
// val < v[i] となる i の個数
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto gt_cnt(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return SZ<T>(v) - UB<T>(v, val, comp, proj); }
// val <= v[i] となる i の個数
template <class T = ll, class V, class Value, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto geq_cnt(const V &v, const Value &val, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{ return SZ<T>(v) - LB<T>(v, val, comp, proj); }
// l <= v[i] < r となる i の個数
template <class T = ll, class V, class L, class R, class Comp = DEFAULT_COMP, class Proj = identity>
inline auto in_cnt(const V &v, L l, R r, Comp comp = {}, Proj proj = {})
-> enable_if_t<is_random_access_iterator_v<typename V::iterator>, T>
{
  if (l > r)
    return 0;
  return lt_cnt<T>(v, r, comp, proj) - lt_cnt<T>(v, l, comp, proj);
}

// --- set 等の lt, leq, gt, geq ---

// *it < val となる最大の it (なければ end())
template <class V, class Value>
inline auto lt_max(const V &v, const Value &val)
-> enable_if_t<!is_random_access_iterator_v<typename V::iterator>, typename V::const_iterator>
{
  auto it = v.lower_bound(val);
  return it == v.begin() ? v.end() : prev(it);
}
// *it <= val となる最大の it (なければ end())
template <class V, class Value>
inline auto leq_max(const V &v, const Value &val)
-> enable_if_t<!is_random_access_iterator_v<typename V::iterator>, typename V::const_iterator>
{
  auto it = v.upper_bound(val);
  return it == v.begin() ? v.end() : prev(it);
}
// val < *it となる最小の it (なければ end())
template <class V, class Value>
inline auto gt_min(const V &v, const Value &val)
-> enable_if_t<!is_random_access_iterator_v<typename V::iterator>, typename V::const_iterator>
{ return v.upper_bound(val); }
// val <= *it となる最小の it (なければ end())
template <class V, class Value>
inline auto geq_min(const V &v, const Value &val)
-> enable_if_t<!is_random_access_iterator_v<typename V::iterator>, typename V::const_iterator>
{ return v.lower_bound(val); }

// --- 自作二分探索 ---

// (ok, ng)
template <class T = ll, class Judge, class InitOk, class InitNg>
pair<T, T> binsearch(const Judge &judge, const InitOk &init_ok, const InitNg &init_ng)
{
  T ok(init_ok), ng(init_ng);
  assert(judge(ok));
  assert(!judge(ng));
  while (ok - ng != 1 && ng - ok != 1)
  {
    T mid = (ok & ng) + ((ok ^ ng) >> 1);
    (judge(mid) ? ok : ng) = mid;
  }
  return {ok, ng};
}
template <class T = ld, class Judge, class InitOk, class InitNg>
T binsearch_real(const Judge &judge, const InitOk &init_ok, const InitNg &init_ng, int iteration_count = 100)
{
  T ok(init_ok), ng(init_ng);
  assert(judge(ok));
  assert(!judge(ng));
  repi(_, iteration_count)
  {
    T mid = (ok + ng) / 2;
    (judge(mid) ? ok : ng) = mid;
  }
  return ok;
}
// (ok, ng)
template <class T = ll, class Judge, class InitVal>
pair<T, T> expsearch(const Judge &judge, const InitVal &init_val, bool positive = true)
{
  T ok, ng;
  if (judge(init_val))
  {
    ok = init_val, ng = init_val + (positive ? 1 : -1);
    for (int i = 1; judge(ng); i++)
      ok = ng, ng = init_val + (positive ? 1 : -1) * (T(1) << i);
  }
  else
  {
    ng = init_val, ok = init_val + (positive ? 1 : -1);
    for (int i = 1; !judge(ok); i++)
      ng = ok, ok = init_val + (positive ? 1 : -1) * (T(1) << i);
  }
  while (ok - ng != 1 && ng - ok != 1)
  {
    T mid = (ok & ng) + ((ok ^ ng) >> 1);
    (judge(mid) ? ok : ng) = mid;
  }
  return {ok, ng};
}


/**
 * @brief テンプレート（ビット演算）
 * @docs docs/template/template_bit.md
 */

template <class T>
inline constexpr ull pow2(T k) { return 1ULL << k; }
template <class T>
inline constexpr ull MASK(T k) { return (1ULL << k) - 1ULL; }

#if __cplusplus < 202002L
// x == 0 ならば 0、そうでなければ 1 + floor(log2(x))
// 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, ... 
inline constexpr ull bit_width(ull x) { return x == 0 ? 0 : 64 - __builtin_clzll(x); }
// 0, 1, 2, 2, 4, 4, 4, 4, 8, 8, ...
inline constexpr ull bit_floor(ull x) { return x == 0 ? 0ULL : 1ULL << (bit_width(x) - 1); }
// 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, ...
inline constexpr ull bit_ceil(ull x) { return x == 0 ? 1ULL : 1ULL << bit_width(x - 1); }
inline constexpr ull countr_zero(ull x) { assert(x != 0); return __builtin_ctzll(x); }
inline constexpr ull popcount(ull x) { return __builtin_popcountll(x); }
inline constexpr bool has_single_bit(ull x) { return popcount(x) == 1; }
#else
// 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, ... 
inline constexpr ll bit_width(ll x) { return std::bit_width((ull)x); }
// 0, 1, 2, 2, 4, 4, 4, 4, 8, 8, ...
inline constexpr ll bit_floor(ll x) { return std::bit_floor((ull)x); }
// 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, ...
inline constexpr ll bit_ceil(ll x) { return std::bit_ceil((ull)x); }
inline constexpr ll countr_zero(ll x) { assert(x != 0); return std::countr_zero((ull)x); }
inline constexpr ll popcount(ll x) { return std::popcount((ull)x); }
inline constexpr bool has_single_bit(ll x) { return std::has_single_bit((ull)x); }
#endif

inline constexpr ull lsb_pos(ull x) { assert(x != 0); return countr_zero(x); }
inline constexpr ull msb_pos(ull x) { assert(x != 0); return bit_width(x) - 1; }
inline constexpr ull lsb_mask(ull x) { assert(x != 0); return x & -x; }
inline constexpr ull msb_mask(ull x) { assert(x != 0); return bit_floor(x); }

inline constexpr bool btest(ull x, uint k) { return (x >> k) & 1; }
template <class T>
inline void bset(T &x, uint k, bool b = 1) { b ? x |= (1ULL << k) : x &= ~(1ULL << k); }
template <class T>
inline void bflip(T &x, uint k) { x ^= (1ULL << k); }
inline constexpr bool bsubset(ull x, ull y) { return (x & y) == x; }
inline constexpr bool bsupset(ull x, ull y) { return (x & y) == y; }
inline constexpr ull bsetminus(ull x, ull y) { return x & ~y; }



/**
 * @brief テンプレート（dump）
 * @docs docs/template/template_dump.md
 */

#ifdef LOCAL
#include <cpp-dump.hpp> // https://github.com/philip82148/cpp-dump
namespace cpp_dump::_detail
{
  inline string export_var(
      const i128 &x, const string &indent, size_t last_line_length,
      size_t current_depth, bool fail_on_newline, const export_command &command
  ) {
    return export_var(i128tos(x), indent, last_line_length, current_depth, fail_on_newline, command);
  }
} // namespace cpp_dump::_detail
#define dump(...) cpp_dump(__VA_ARGS__)
namespace cp = cpp_dump;
CPP_DUMP_SET_OPTION_GLOBAL(log_label_func, cp::log_label::line());
CPP_DUMP_SET_OPTION_GLOBAL(max_iteration_count, 10000);
#define local(...) __VA_ARGS__
#else
#define dump(...)
#define local(...)
#endif

/**
 * @brief テンプレート（入出力）
 * @docs docs/template/template_inout.md
 */

// https://judge.yosupo.jp/submission/170706 (maspy さん)
// https://judge.yosupo.jp/submission/21623  (Nyaan さん)
#if defined FAST_IO and not defined LOCAL
namespace fastio {
static constexpr uint32_t SIZ = 1 << 17;
char ibuf[SIZ];
char obuf[SIZ];
char out[100];
// pointer of ibuf, obuf
uint32_t pil = 0, pir = 0, por = 0;

struct Pre {
  char num[10000][4];
  constexpr Pre() : num() {
    for (int i = 0; i < 10000; i++) {
      int n = i;
      for (int j = 3; j >= 0; j--) {
        num[i][j] = n % 10 | '0';
        n /= 10;
      }
    }
  }
} constexpr pre;

inline void load() {
  memcpy(ibuf, ibuf + pil, pir - pil);
  pir = pir - pil + fread(ibuf + pir - pil, 1, SIZ - pir + pil, stdin);
  pil = 0;
  if (pir < SIZ) ibuf[pir++] = '\n';
}

inline void flush() {
  fwrite(obuf, 1, por, stdout);
  por = 0;
}

void rd1(char &c) {
  do {
    if (pil + 1 > pir) load();
    c = ibuf[pil++];
  } while (isspace(c));
}

void rd1(string &x) {
  x.clear();
  char c;
  do {
    if (pil + 1 > pir) load();
    c = ibuf[pil++];
  } while (isspace(c));
  do {
    x += c;
    if (pil == pir) load();
    c = ibuf[pil++];
  } while (!isspace(c));
}

template <typename T>
void rd1_real(T &x) {
  string s;
  rd1(s);
  x = stod(s);
}

template <typename T>
void rd1_integer(T &x) {
  if (pil + 100 > pir) load();
  char c;
  do
    c = ibuf[pil++];
  while (c < '-');
  bool minus = 0;
  if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
    if (c == '-') { minus = 1, c = ibuf[pil++]; }
  }
  x = 0;
  while ('0' <= c) { x = x * 10 + (c & 15), c = ibuf[pil++]; }
  if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
    if (minus) x = -x;
  }
}

void rd1(int &x) { rd1_integer(x); }
void rd1(ll &x) { rd1_integer(x); }
void rd1(i128 &x) { rd1_integer(x); }
void rd1(uint &x) { rd1_integer(x); }
void rd1(ull &x) { rd1_integer(x); }
void rd1(u128 &x) { rd1_integer(x); }
void rd1(double &x) { rd1_real(x); }
void rd1(long double &x) { rd1_real(x); }
// void rd1(f128 &x) { rd1_real(x); }

template <class T, class U>
void rd1(pair<T, U> &p) {
  return rd1(p.first), rd1(p.second);
}
template <size_t N = 0, typename T>
void rd1_tuple(T &t) {
  if constexpr (N < std::tuple_size<T>::value) {
    auto &x = std::get<N>(t);
    rd1(x);
    rd1_tuple<N + 1>(t);
  }
}
template <class... T>
void rd1(tuple<T...> &tpl) {
  rd1_tuple(tpl);
}

template <size_t N = 0, typename T>
void rd1(array<T, N> &x) {
  for (auto &d: x) rd1(d);
}
template <class T>
void rd1(vc<T> &x) {
  for (auto &d: x) rd1(d);
}

void read() {}
template <class H, class... T>
void read(H &h, T &... t) {
  rd1(h), read(t...);
}

void wt1(const char c) {
  if (por == SIZ) flush();
  obuf[por++] = c;
}
void wt1(const string s) {
  for (char c: s) wt1(c);
}
void wt1(const char *s) {
  size_t len = strlen(s);
  for (size_t i = 0; i < len; i++) wt1(s[i]);
}

template <typename T>
void wt1_integer(T x) {
  if (por > SIZ - 100) flush();
  if (x < 0) { obuf[por++] = '-', x = -x; }
  int outi;
  for (outi = 96; x >= 10000; outi -= 4) {
    memcpy(out + outi, pre.num[x % 10000], 4);
    x /= 10000;
  }
  if (x >= 1000) {
    memcpy(obuf + por, pre.num[x], 4);
    por += 4;
  } else if (x >= 100) {
    memcpy(obuf + por, pre.num[x] + 1, 3);
    por += 3;
  } else if (x >= 10) {
    int q = (x * 103) >> 10;
    obuf[por] = q | '0';
    obuf[por + 1] = (x - q * 10) | '0';
    por += 2;
  } else
    obuf[por++] = x | '0';
  memcpy(obuf + por, out + outi + 4, 96 - outi);
  por += 96 - outi;
}

template <typename T>
void wt1_real(T x) {
  ostringstream oss;
  oss << fixed << setprecision(15) << double(x);
  string s = oss.str();
  wt1(s);
}

template <class T, enable_if_t<is_integral_v<T>, int> = 0>
void wt1(T x) { wt1_integer(x); }
void wt1(i128 x) { wt1_integer(x); }
void wt1(u128 x) { wt1_integer(x); }
void wt1(double x) { wt1_real(x); }
void wt1(long double x) { wt1_real(x); }
// void wt1(f128 x) { wt1_real(x); }

template <class T, class U>
void wt1(const pair<T, U> &val) {
  wt1(val.first);
  wt1(' ');
  wt1(val.second);
}
template <size_t N = 0, typename T>
void wt1_tuple(const T &t) {
  if constexpr (N < std::tuple_size<T>::value) {
    if constexpr (N > 0) { wt1(' '); }
    const auto x = std::get<N>(t);
    wt1(x);
    wt1_tuple<N + 1>(t);
  }
}
template <class... T>
void wt1(const tuple<T...> &tpl) {
  wt1_tuple(tpl);
}
template <class T, size_t S>
void wt1(const array<T, S> &val) {
  auto n = val.size();
  for (size_t i = 0; i < n; i++) {
    if (i) wt1(' ');
    wt1(val[i]);
  }
}
template <class T>
void wt1(const vector<T> &val) {
  auto n = val.size();
  for (size_t i = 0; i < n; i++) {
    if (i) wt1(' ');
    wt1(val[i]);
  }
}

void write() {}
template <class Head, class... Tail>
void write(Head &&head, Tail &&... tail) {
  wt1(head);
  write(forward<Tail>(tail)...);
}

void print() { wt1('\n'); }
template <class Head, class... Tail>
void print(Head &&head, Tail &&... tail) {
  wt1(head);
  if (sizeof...(Tail)) wt1(' ');
  print(forward<Tail>(tail)...);
}

} // namespace fastio

#endif

#if defined FAST_IO and not defined LOCAL
struct Dummy {
  Dummy() { atexit(fastio::flush); }
} dummy;
#endif

// https://trap.jp/post/1224/

// ---- 入力 ----
#if defined LOCAL or not defined FAST_IO
template <class T, class U>
istream &operator>>(istream &is, pair<T, U> &p)
{
  is >> p.first >> p.second;
  return is;
}
template <class... Ts>
istream &operator>>(istream &is, tuple<Ts...> &t)
{
  apply([&](auto &...a)
        { (is >> ... >> a); }, t);
  return is;
}
template <class T, size_t n>
istream &operator>>(istream &is, array<T, n> &a)
{
  for (size_t i = 0; i < n; i++)
    is >> a[i];
  return is;
}
template <class T>
istream &operator>>(istream &is, vc<T> &a)
{
  const size_t n = a.size();
  for (size_t i = 0; i < n; i++)
    is >> a[i];
  return is;
}
#endif

namespace internal
{

#if defined LOCAL or not defined FAST_IO
template <class... Ts>
void CIN(Ts &...a) { (cin >> ... >> a); }
#endif

#if defined FAST_IO and not defined LOCAL
template <class... Ts>
void READnodump(Ts &...a) { fastio::read(a...); }
#else
template <class... Ts>
void READnodump(Ts &...a) { CIN(a...); }
#endif

template <class T>
void READVECnodump(int n, vc<T> &v)
{
  v.resize(n);
  READnodump(v);
}
template <class T, class... Ts>
void READVECnodump(int n, vc<T> &v, vc<Ts> &...vs)
{ READVECnodump(n, v), READVECnodump(n, vs...); }

template <class T>
void READVEC2nodump(int n, int m, vvc<T> &v)
{
  v.assign(n, vc<T>(m));
  READnodump(v);
}
template <class T, class... Ts>
void READVEC2nodump(int n, int m, vvc<T> &v, vvc<Ts> &...vs)
{ READVEC2nodump(n, m, v), READVEC2nodump(n, m, vs...); }

template <class T>
void READJAGnodump(int n, vvc<T> &v)
{
  v.resize(n);
  repi(i, n)
  {
    int k;
    READnodump(k);
    READVECnodump(k, v[i]);
  }
}
template <class T, class... Ts>
void READJAGnodump(int n, vvc<T> &v, vvc<Ts> &...vs)
{ READJAGnodump(n, v), READJAGnodump(n, vs...); }

}; // namespace internal

#define READ(...) internal::READnodump(__VA_ARGS__); dump(__VA_ARGS__)

#define IN(T, ...) T __VA_ARGS__; READ(__VA_ARGS__)

#define CHAR(...) IN(char, __VA_ARGS__)
#define INT(...) IN(int, __VA_ARGS__)
#define LL(...) IN(ll, __VA_ARGS__)
#define STR(...) IN(string, __VA_ARGS__)
#define ARR(T, n, ...) array<T, n> __VA_ARGS__; READ(__VA_ARGS__)

#define READVEC(...) internal::READVECnodump(__VA_ARGS__); dump(__VA_ARGS__)
#define READVEC2(...) internal::READVEC2nodump(__VA_ARGS__); dump(__VA_ARGS__)

#define VEC(T, n, ...) vc<T> __VA_ARGS__; READVEC(n, __VA_ARGS__)
#define VEC2(T, n, m, ...) vvc<T> __VA_ARGS__; READVEC2(n, m, __VA_ARGS__)

#define READJAG(...) internal::READJAGnodump(__VA_ARGS__); dump(__VA_ARGS__)

#define JAG(T, n, ...) vvc<T> __VA_ARGS__; READJAG(n, __VA_ARGS__)

// ----------

// ----- 出力 -----
#ifdef INTERACTIVE
#define ENDL endl
#else
#define ENDL '\n'
#endif

#if defined LOCAL or not defined FAST_IO
template <class T, class U>
ostream &operator<<(ostream &os, const pair<T, U> &p)
{
  os << p.first << ' ' << p.second;
  return os;
}

namespace internal
{

template <size_t N = 0, typename T>
void cout_tuple(ostream &os, const T &t) {
  if constexpr (N < std::tuple_size<T>::value) {
    if constexpr (N > 0) { os << ' '; }
    const auto x = std::get<N>(t);
    os << x;
    cout_tuple<N + 1>(os, t);
  }
}

}; // namespace internal

template <class... Ts>
ostream &operator<<(ostream &os, const tuple<Ts...> &t)
{
  internal::cout_tuple(os, t);
  return os;
}
template <class T, size_t n>
ostream &operator<<(ostream &os, const array<T, n> &a)
{
  for (size_t i = 0; i < n; i++)
  {
    if (i)
      os << ' ';
    os << a[i];
  }
  return os;
}
template <class T>
ostream &operator<<(ostream &os, const vc<T> &v)
{
  const size_t n = v.size();
  for (size_t i = 0; i < n; i++)
  {
    if (i)
      os << ' ';
    os << v[i];
  }
  return os;
}

namespace internal
{

template <class T>
void COUTW() {}
template <class... Ts>
void COUTW(const Ts &...a) { (cout << ... << a); }

template <class T>
void COUTP() { cout << ENDL; }
template <class T>
void COUTP(const T &a) { cout << a << ENDL; }
template <class T, class... Ts>
void COUTP(const T &a, const Ts &...b)
{
  cout << a;
  (cout << ... << (cout << ' ', b));
  cout << ENDL;
}

}; // namespace internal
#endif

#if defined FAST_IO and not defined LOCAL
#define WRITE fastio::write
#define PRINT fastio::print
#else
#define WRITE internal::COUTW
#define PRINT internal::COUTP
#endif
#define PRINTEXIT(...) do { PRINT(__VA_ARGS__); exit(0); } while (false)
#define PRINTRETURN(...) do { PRINT(__VA_ARGS__); return; } while (false)

template <class T>
void PRINTV(const vc<T> &v) { for (auto &vi : v) PRINT(vi); }
#define PRINTVEXIT(...) do { PRINTV(__VA_ARGS__); exit(0); } while (false)
#define PRINTVRETURN(...) do { PRINTV(__VA_ARGS__); return; } while (false)
// ----------

// ----- 基準ずらし -----
template <class T, class U, class P>
pair<T, U> operator+=(pair<T, U> &a, const P &b)
{
  a.first += b.first;
  a.second += b.second;
  return a;
}
template <class T, class U, class P>
pair<T, U> operator+(pair<T, U> &a, const P &b) { return a += b; }

template <class T, size_t n, class A>
array<T, n> operator+=(array<T, n> &a, const A &b)
{
  for (size_t i = 0; i < n; i++)
    a[i] += b[i];
  return a;
}
template <class T, size_t n, class A>
array<T, n> operator+(array<T, n> &a, const A &b) { return a += b; }

namespace internal
{

template <size_t... I, class A, class B>
auto tuple_add_impl(A &a, const B &b, const index_sequence<I...>)
{
  ((get<I>(a) += get<I>(b)), ...);
  return a;
}

}; // namespace internal

template <class... Ts, class Tp>
tuple<Ts...> operator+=(tuple<Ts...> &a, const Tp &b)
{ return internal::tuple_add_impl(a, b, make_index_sequence<tuple_size_v<tuple<Ts...>>>{}); }
template <class... Ts, class Tp>
tuple<Ts...> operator+(tuple<Ts...> &a, const Tp &b) { return a += b; }

template <class T, class Add>
void offset(vc<T> &v, const Add &add) { for (auto &vi : v) vi += add; }
template <class T, class Add>
void offset(vvc<T> &v, const Add &add) { for (auto &vi : v) for (auto &vij : vi) vij += add; }
// ----------

// ----- 転置 -----
template <class T, const size_t m>
array<vc<T>, m> top(const vc<array<T, m>> &vt)
{
  const size_t n = vt.size();
  array<vc<T>, m> tv;
  tv.fill(vc<T>(n));
  for (size_t i = 0; i < n; i++)
    for (size_t j = 0; j < m; j++)
      tv[j][i] = vt[i][j];
  return tv;
}
template <class T, const size_t m>
vc<array<T, m>> top(const array<vc<T>, m> &tv)
{
  if (tv.empty()) return {};
  const size_t n = tv[0].size();
  vc<array<T, m>> vt(n);
  for (size_t j = 0; j < m; j++)
  {
    assert(tv[j].size() == n);
    for (size_t i = 0; i < n; i++)
      vt[i][j] = tv[j][i];
  }
  return vt;
}

template <class T, class U>
pair<vc<T>, vc<U>> top(const vc<pair<T, U>> &vt)
{
  const size_t n = vt.size();
  pair<vc<T>, vc<U>> tv;
  tv.first.resize(n), tv.second.resize(n);
  for (size_t i = 0; i < n; i++)
    tie(tv.first[i], tv.second[i]) = vt[i];
  return tv;
}
template <class T, class U>
vc<pair<T, U>> top(const pair<vc<T>, vc<U>> &tv)
{
  const size_t n = tv.first.size();
  assert(n == tv.second.size());
  vc<pair<T, U>> vt(n);
  for (size_t i = 0; i < n; i++)
    vt[i] = make_pair(tv.first[i], tv.second[i]);
  return vt;
}

namespace internal
{

template <size_t... I, class V, class Tp>
auto vt_to_tv_impl(V &tv, const Tp &t, index_sequence<I...>, size_t index)
{ ((get<I>(tv)[index] = get<I>(t)), ...); }

template <size_t... I, class Tp>
auto tv_to_vt_impl(const Tp &tv, index_sequence<I...>, size_t index)
{ return make_tuple(get<I>(tv)[index]...); }

};

template <class... Ts>
auto top(const vc<tuple<Ts...>> &vt)
{
  const size_t n = vt.size();
  tuple<vc<Ts>...> tv;
  apply([&](auto &...v)
        { ((v.resize(n)), ...); }, tv);
  for (size_t i = 0; i < n; i++)
    internal::vt_to_tv_impl(tv, vt[i], make_index_sequence<tuple_size_v<decltype(tv)>>{}, i);
  return tv;
}

template <class... Ts>
auto top(const tuple<vc<Ts>...> &tv)
{
  size_t n = get<0>(tv).size();
  apply([&](auto &...v)
        { ((assert(v.size() == n)), ...); }, tv);
  vc<tuple<Ts...>> vt(n);
  for (size_t i = 0; i < n; i++)
    vt[i] = internal::tv_to_vt_impl(tv, index_sequence_for<Ts...>{}, i);
  return vt;
}
// ----------

#include <atcoder/convolution>
using mint = atcoder::modint998244353;

// https://codeforces.com/blog/entry/61306
template<class T>
vector<T> BerlekampMassey(const vector<T> &A)
{
  int N = A.size();
  vector<T> B(0), C(0);
  int pos = -1;
  T x = 0;
  for (int i = 0; i < N; i++)
  {
    int d = C.size();
    T y = A[i];
    for (int j = 0; j < d; j++)
      y -= C[j] * A[i - 1 - j];
    if (y == 0)
      continue;
    
    if (C.empty())
    {
      C.assign(i + 1, 0);
      pos = i;
      x = y;
      continue;
    }

    T z = y / x;
    int d2 = i - pos + B.size();
    vector<T> tmp;
    if (d2 >= d)
    {
      tmp = C;
      C.resize(d2);
    }
    C[i - 1 - pos] += z;
    for (int j = 0; j < (int)B.size(); j++)
      C[i - pos + j] -= z * B[j];
    if (d2 >= d)
    {
      pos = i;
      x = y;
      B = tmp;
    }
  }
  return C;
}

// T には modint が入る
template<class T>
vector<T> convolution_anymod(const vector<T> &A, const vector<T> &B)
{
  int N = A.size(), M = B.size();
  if (min(N, M) <= 250)
  {
    vector<T> C(N + M - 1, 0);
    for (int i = 0; i < N; i++)
      for (int j = 0; j < M; j++)
        C.at(i + j) += A.at(i) * B.at(j);
    return C;
  }

  constexpr ll MOD1 = 167772161, MOD2 = 469762049, MOD3 = 1224736769;
  using mint2 = atcoder::static_modint<MOD2>;
  using mint3 = atcoder::static_modint<MOD3>;
  constexpr int i1_2 = atcoder::internal::inv_gcd(MOD1, MOD2).second;
  constexpr int i12_3 = atcoder::internal::inv_gcd(MOD1 * MOD2, MOD3).second;
  T m12 = T(MOD1) * T(MOD2);

  vector<int> A_(N), B_(M);
  for (int i = 0; i < N; i++)
    A_[i] = A[i].val();
  for (int i = 0; i < M; i++)
    B_[i] = B[i].val();
  auto C1 = atcoder::convolution<MOD1>(A_, B_);
  auto C2 = atcoder::convolution<MOD2>(A_, B_);
  auto C3 = atcoder::convolution<MOD3>(A_, B_);

  vector<T> C(N + M - 1);
  for (ll i = 0; i < N + M - 1; i++)
  {
    int c1 = C1[i], c2 = C2[i], c3 = C3[i];
    int t1 = (mint2(c2 - c1) * mint2::raw(i1_2)).val();
    int t2 = ((mint3(c3 - c1) - mint3::raw(t1) * mint3::raw(MOD1)) * mint3::raw(i12_3)).val();
    C[i] = T(c1) + T(t1) * T(MOD1) + T(t2) * m12;
  }
  return C;
}

// https://opt-cp.com/fps-implementation/
// https://qiita.com/hotman78/items/f0e6d2265badd84d429a
// https://opt-cp.com/fps-fast-algorithms/
// https://maspypy.com/%E5%A4%9A%E9%A0%85%E5%BC%8F%E3%83%BB%E5%BD%A2%E5%BC%8F%E7%9A%84%E3%81%B9%E3%81%8D%E7%B4%9A%E6%95%B0-%E9%AB%98%E9%80%9F%E3%81%AB%E8%A8%88%E7%AE%97%E3%81%A7%E3%81%8D%E3%82%8B%E3%82%82%E3%81%AE
template<class T, bool is_ntt_friendly>
struct FormalPowerSeries : vector<T>
{
  using vector<T>::vector;
  using vector<T>::operator=;
  using F = FormalPowerSeries;

  F operator-() const
  {
    F res(*this);
    for (auto &a : res)
      a = -a;
    return res;
  }

  F operator*=(const T &k)
  {
    for (auto &a : *this)
      a *= k;
    return *this;
  }
  F operator*(const T &k) const { return F(*this) *= k; }
  friend F operator*(const T k, const F &f) { return f * k; }
  F operator/=(const T &k)
  {
    *this *= k.inv();
    return *this;
  }
  F operator/(const T &k) const { return F(*this) /= k; }

  F &operator+=(const F &g)
  {
    int n = (*this).size(), m = g.size();
    (*this).resize(max(n, m), T(0));
    for (int i = 0; i < m; i++)
      (*this)[i] += g[i];
    return *this;
  }
  F operator+(const F &g) const { return F(*this) += g; }
  F &operator-=(const F &g)
  {
    int n = (*this).size(), m = g.size();
    (*this).resize(max(n, m), T(0));
    for (int i = 0; i < m; i++)
      (*this)[i] -= g[i];
    return *this;
  }
  F operator-(const F &g) const { return F(*this) -= g; }

  F &operator<<=(const ll d)
  {
    int n = (*this).size();
    (*this).insert((*this).begin(), min(ll(n), d), T(0));
    (*this).resize(n);
    return *this;
  }
  F operator<<(const ll d) const { return F(*this) <<= d; }
  F &operator>>=(const ll d)
  {
    int n = (*this).size();
    (*this).erase((*this).begin(), (*this).begin() + min(ll(n), d));
    (*this).resize(n, T(0));
    return *this;
  }
  F operator>>(const ll d) const { return F(*this) >>= d; }

  template<const int MOD>
  F convolution2(const vector<atcoder::static_modint<MOD>> &A, const vector<atcoder::static_modint<MOD>> &B, const int d = -1)
  {
    F res;
    if (is_ntt_friendly)
      res = atcoder::convolution(A, B);
    else
      res = convolution_anymod(A, B);
    if (d != -1 && (int)res.size() > d)
      res.resize(d);
    return res;
  }
  template<const int id>
  F convolution2(const vector<atcoder::dynamic_modint<id>> &A, const vector<atcoder::dynamic_modint<id>> &B, const int d = -1)
  {
    F res;
    res = convolution_anymod(A, B);
    if (d != -1 && (int)res.size() > d)
      res.resize(d);
    return res;
  }

  F &operator*=(const F &g)
  {
    int n = (*this).size();
    if (n == 0)
      return *this;
    *this = convolution2(*this, g, n);
    return *this;
  }
  F operator*(const F &g) const { return F(*this) *= g; }

  template <const int MOD>
  void butterfly2(FormalPowerSeries<atcoder::static_modint<MOD>, true> &A) const { atcoder::internal::butterfly(A); }
  template <const int MOD>
  void butterfly2(FormalPowerSeries<atcoder::static_modint<MOD>, false>) const { assert(false); }
  template <const int id>
  void butterfly2(FormalPowerSeries<atcoder::dynamic_modint<id>, false>) const { assert(false); }
  template <const int MOD>
  void butterfly_inv2(FormalPowerSeries<atcoder::static_modint<MOD>, true> &A) const { atcoder::internal::butterfly_inv(A); }
  template <const int MOD>
  void butterfly_inv2(FormalPowerSeries<atcoder::static_modint<MOD>, false>) const { assert(false); }
  template <const int id>
  void butterfly_inv2(FormalPowerSeries<atcoder::dynamic_modint<id>, false>) const { assert(false); }
};

// [x^N] P(x)/Q(x) を求める（P の次数は Q の次数より小さい）
template<class T, bool is_ntt_friendly>
T bostan_mori(const FormalPowerSeries<T, is_ntt_friendly> &P, const FormalPowerSeries<T, is_ntt_friendly> &Q, ll N)
{
  using F = FormalPowerSeries<T, is_ntt_friendly>;

  int d = (int)Q.size() - 1;
  assert((int)P.size() <= d);
  if (is_ntt_friendly)
  {
    int z = 1;
    while (z < 2 * d + 1)
      z <<= 1;
    T iz = T(z).inv();
    F U = F(P), V = F(Q);
    U.resize(z), V.resize(z);
    while (N > 0)
    {
      U.butterfly2(U), V.butterfly2(V);
      for (int i = 0; i < z; i += 2)
      {
        T x = V[i + 1], y = V[i];
        U[i] *= x, V[i] *= x;
        U[i + 1] *= y, V[i + 1] *= y;
      }
      U.butterfly_inv2(U), V.butterfly_inv2(V);
      for (int i = 0; i < (z >> 1); i++)
      {
        U[i] = U[2 * i + (N & 1)] * iz;
        V[i] = V[2 * i] * iz;
      }
      for (int i = (z >> 1); i < z; i++)
        U[i] = 0, V[i] = 0;
      N >>= 1;
    }
    return U.front() / V.front();
  }
  else
  {
    F U = F(P), V = F(Q);
    U.resize(d + 1), V.resize(d + 1);
    while (N > 0)
    {
      F U2 = F(U), V2 = F(V), V3 = F(V);
      for (int i = 1; i <= d; i += 2)
        V3[i] = -V3[i];
      U2 = convolution_anymod(U2, V3), V2 = convolution_anymod(V2, V3);
      for (int i = 0; i <= d; i++)
      {
        if (2 * i + (N & 1) < (int)U2.size())
          U[i] = U2[2 * i + (N & 1)];
        if (2 * i < (int)V2.size())
          V[i] = V2[2 * i];
      }
      N >>= 1;
    }
    return U.front() / V.front();
  }
}
// a_n = sum[i = 1..d] c_i a_{n-i}（n ≥ d）を満たすとき、a_N を求める（A は 0-indexed で C は 1-indexed）
template<class T, bool is_ntt_friendly>
T linear_recurrence(const vector<T> &A, const vector<T> &C, ll N)
{
  using F = FormalPowerSeries<T, is_ntt_friendly>;

  int d = C.size();
  assert((int)A.size() >= d);

  F Ga(d), Q(d + 1);
  Q[0] = 1;
  for (int i = 0; i < d; i++)
    Ga[i] = A[i], Q[i + 1] = -C[i];
  F P = Ga * Q;
  return bostan_mori(P, Q, N);
}

void init() {}

void main2()
{
  // LL(K);

  // const ll N = 1000;

  // // dp(i, j) := 最後に使ったのが i で、その連結成分のサイズは j==0 なら 1、j==1 なら 2 以上
  // auto dp = dvec({N, 2LL}, mint(0));
  // rep(i, N) dp.at(i).at(0) += 1;
  // rep(i, N) rep(j, 2)
  // {
  //   rep(ni, i + 1, N)
  //   {
  //     ll di = ni - i;
  //     if (di <= K)
  //       dp.at(ni).at(1) += dp.at(i).at(j);
  //     else if (j == 1)
  //       dp.at(ni).at(0) += dp.at(i).at(j);
  //   }
  // }
  // vc<mint> A(N + 1, 1);
  // rep(i, N) A.at(i + 1) = A.at(i) + dp.at(i).at(1);

  // auto C = BerlekampMassey<mint>(A);
  // dump(C);

  // 実験すると 2, 0,0,0(K-1個), -1, 1,1,1(K個)

  LL(N_, K);

  const ll N = 2 * K + 1;

  vc<mint> f(N), g(N), s(N + 1), t(N + 1);
  rep(i, N)
  {
    // f.at(i) = 1;
    // rep(j, max(0LL, i - K)) f.at(i) += g.at(j);
    // rep(j, max(0LL, i - K), i) g.at(i) += f.at(j) + g.at(j);
    f.at(i) = 1 + t.at(max(0LL, i - K));
    g.at(i) = s.at(i) + t.at(i) - s.at(max(0LL, i - K)) - t.at(max(0LL, i - K));
    s.at(i + 1) = s.at(i) + f.at(i);
    t.at(i + 1) = t.at(i) + g.at(i);
  }
  vc<mint> A(N + 1, 1);
  rep(i, N) A.at(i + 1) = A.at(i) + g.at(i);

  vc<mint> C;
  C.eb(2);
  rep(i, K - 1) C.eb(0);
  C.eb(-1);
  rep(i, K) C.eb(1);

  mint ans = linear_recurrence<mint, true>(A, C, N_);
  PRINT(ans.val());
}

void test()
{
  /*
  local(
    rep(testcase, 100000)
    {
      cout << endl;
      dump(testcase);


      // ----- generate cases -----
      ll N = 1 + rand() % 5;
      vl A(N);
      rep(i, N) A.at(i) = 1 + rand() % 10;
      // --------------------------

      // ------ check output ------
      #define INPUT A
      auto god = naive(INPUT);
      auto ans = solve(INPUT);
      if (god != ans)
      {
        dump(INPUT);
        dump(god, ans);
        exit(0);
      }
      // --------------------------
    }
    dump("ok");
  );
  //*/
}

int main()
{
  cauto CERR = [](string val, string color)
  {
    string s = "\033[" + color + "m" + val + "\033[m";
    #ifdef LOCAL
    cerr << s;
    #endif
    /* コードテストで確認する際にコメントアウトを外す
    cerr << val;
    //*/
  };

  #if defined FAST_IO and not defined LOCAL
  CERR("\n[FAST_IO]\n\n", "32");
  #endif
  #if defined FAST_CIO and not defined LOCAL
  CERR("\n[FAST_CIO]\n\n", "32");
  cin.tie(0);
  ios::sync_with_stdio(false);
  #endif
  cout << fixed << setprecision(20);

  test();
  init();

  #if defined AOJ_TESTCASE or (defined LOCAL and defined SINGLE_TESTCASE)
  CERR("\n[AOJ_TESTCASE]\n\n", "35");
  while (true)
  {
    dump("new testcase");
    main2();
  }
  #elif defined SINGLE_TESTCASE
  CERR("\n[SINGLE_TESTCASE]\n\n", "36");
  main2();
  #elif defined MULTI_TESTCASE
  CERR("\n[MULTI_TESTCASE]\n\n", "33");
  dump("T");
  IN(uint, T);
  while (T--)
  {
    dump("new testcase");
    main2();
  }
  #endif
}