#1163987 (C++23) No.3538 Not First Place

提出ソース
結果

問題	No.3538 Not First Place
コンテスト
ユーザー	👑 hamamu
提出日時	2026-05-08 23:19:45
言語	C++23 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=c++23 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	RE
実行時間	-
コード長	60,348 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	5,301 ms
コンパイル使用メモリ	380,428 KB
実行使用メモリ	183,612 KB
最終ジャッジ日時	2026-05-08 23:20:41
合計ジャッジ時間	17,900 ms
ジャッジサーバーID （参考情報）	judge2_0 / judge1_1
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 1 RE * 1
other	AC * 4 TLE * 1 -- * 21
権限があれば一括ダウンロードができます
ソースコード

raw source code
#ifndef MYLOCAL
//# pragma GCC target("avx2")//yukiではNG
# pragma GCC optimize("O3")
# pragma GCC optimize("unroll-loops")
#endif
#if defined(NDEBUG)
#undef NDEBUG
#endif
#include "bits/stdc++.h"
using namespace std;
using ll=long long;
using dd=long double;
using pll=pair<ll,ll>;
using tll=tuple<ll,ll,ll>;
using qll=tuple<ll,ll,ll,ll>;
using namespace chrono;
constexpr ll INF = 1201001001001001001;
struct Fast{ Fast(){ cin.tie(0); ios::sync_with_stdio(false); cout<<fixed<<setprecision(numeric_limits<double>::max_digits10); } } fast;
#define EXPAND( x ) x//VS用おまじない
#define overload3(_1,_2,_3,name,...) name
#define overload4(_1,_2,_3,_4,name,...) name
#define overload5(_1,_2,_3,_4,_5,name,...) name
#define rep1(N)          for (ll dmyi = 0; dmyi < (N); dmyi++)
#define rep2(i, N)       for (ll i = 0; i < (N); i++)
#define rep3(i, S, E)    for (ll i = (S); i <= (E); i++)
#define rep4(i, S, E, t) for (ll i = (S); i <= (E); i+=(t))
#define rep(...) EXPAND(overload4(__VA_ARGS__,rep4,rep3,rep2,rep1)(__VA_ARGS__))
#define dep3(i, E, S)    for (ll i = (E); i >= (S); i--)
#define dep4(i, E, S, t) for (ll i = (E); i >= (S); i-=(t))
#define dep(...) EXPAND(overload4(__VA_ARGS__, dep4, dep3,_,_)(__VA_ARGS__))
#define ALL1(v)     (v).begin(),     (v).end()
#define ALL2(v,E)   (v).begin(),     (v).begin()+((E)+1)
#define ALL3(v,S,E) (v).begin()+(S), (v).begin()+((E)+1)
#define all(...) EXPAND(overload3(__VA_ARGS__, ALL3, ALL2, ALL1)(__VA_ARGS__))
template<class T> inline bool chmax(T &a, T b) { if (a < b) { a = b; return true; }return false; }
template<class T> inline bool chmin(T &a, T b) { if (a > b) { a = b; return true; }return false; }
template<class T> [[nodiscard]] inline T limithi(T a,T b){ return min(a,b); }
template<class T> [[nodiscard]] inline T limitlo(T a,T b){ return max(a,b); }
template<class T> inline bool chlimithi(T &a,T b){ return chmin(a,b); }
template<class T> inline bool chlimitlo(T &a,T b){ return chmax(a,b); }
template<class T> inline auto maxe(T &&v,ll S,ll E){ return *max_element(all(v,S,E)); }
template<class T> inline auto maxe(T &&v){ return *max_element(all(v)); }
template<class T> inline auto mine(T &&v,ll S,ll E){ return *min_element(all(v,S,E)); }
template<class T> inline auto mine(T &&v){ return *min_element(all(v)); }
template<class T,class U=typename remove_reference<T>::type::value_type>
inline U sum(T &&v,ll S,ll E) {return accumulate(all(v,S,E),U());}
template<class T> inline auto sum(T &&v) {return sum(v,0,v.end()-v.begin()-1);}
template<class T> inline ll sz(T &&v){ return (ll)v.size(); }

//cin
struct cinutil{
    template<class T> static void cin1core(T &a){ cin>>a; }
    template<class T,class S> static void cin1core(pair<T,S> &a){
        cin1core(a.first),cin1core(a.second);
    }
    template<class... Args> static void cin1core(tuple<Args...> &a){
        cinTplRec<tuple<Args...>,sizeof...(Args)-1>()(a);
    }
    template<class T,size_t N>
    static void cin1core(array<T,N> &a){ for (int i=0; i<(int)N; ++i) cin>>a[i]; }
private:
    template<class Tpl,int i> struct cinTplRec{
        void operator()(Tpl &a){ cinTplRec<Tpl,i-1>()(a); cin1core(get<i>(a)); }
    };
    template<class Tpl> struct cinTplRec<Tpl,0>{
        void operator()(Tpl &a){ cin1core(get<0>(a)); }
    };
};
template<class T> T cin1(){ T a; cinutil::cin1core(a); return a; }
template<class... Args> tuple<Args...> cins(){ return cin1<tuple<Args...>>(); }

//cout
template<class T,class S> inline ostream &operator<<(ostream &os,const pair<T,S> &a){ return os << a.first << ' ' << a.second; }
template<class T,class S,class R> inline ostream &operator<<(ostream &os,const tuple<T,S,R> &a){ return os << get<0>(a) << ' ' << get<1>(a) << ' ' << get<2>(a); }
template<class T,class S,class R,class Q> inline ostream &operator<<(ostream &os,const tuple<T,S,R,Q> &a){ return os << get<0>(a) << ' ' << get<1>(a) << ' ' << get<2>(a) << ' ' << get<3>(a); }
template<class T> inline ostream &operator<<(ostream &os,const vector<T> &a){ for (ll i=0; i<(ll)a.size(); i++) os<<(i>0?" ":"")<<a[i];  return os; }

inline struct{
  system_clock::time_point st = system_clock::now();
  ll operator()()const{return duration_cast<microseconds>(system_clock::now()-st).count()/1000;}
} timeget;


template<long long MOD> struct mll_{
    using Int = long long;
    using ll = long long;
    ll val_=0;
    /*---- utility ----*/
    mll_ &norm(){ return normR().normS(); }//正規化
    mll_ &normR(){ val_%=MOD; return *this; }//剰余正規化のみ
    mll_ &normS(){ if (val_<0) val_+=MOD; return *this; }//正負正規化のみ
    mll_ &normP(){ if (val_>=MOD) val_-=MOD; return *this; }//加算時正規化
    mll_ &invsg(){ val_=-val_; return normS(); }//正負反転
    ll modinv(int a){//a^-1 mod MOD
        int ypre=0,y=1,apre=MOD;
        while (a>1){
            int t=apre/a;
            apre-=a*t,swap(a,apre);
            ypre-=y*t,swap(y,ypre);
        }
        return y<0 ? y+MOD : y;
    }
    /*---- I/F ----*/
    mll_(){}
    mll_(ll v): val_(v){ norm(); }
    mll_(ll v,bool b): val_(v){} //正規化無のコンストラクタ
    Int val()const{ return (Int)val_; }
    bool isnone() const { return val_==-1; } //true:値なし
    mll_ &none() { val_=-1; return *this; } //値なしにする
    mll_ &inv(){ val_=modinv((int)val_); return *this; }
    mll_ &operator+=(mll_ b){ val_+=b.val_; return normP(); }
    mll_ &operator-=(mll_ b){ val_-=b.val_; return normS(); }
    mll_ &operator*=(mll_ b){ val_*=b.val_; return normR(); }
    mll_ &operator/=(mll_ b){ return *this*=b.inv(); }
    mll_ &operator+=(ll b){ return *this+=mll_(b); }
    mll_ &operator-=(ll b){ return *this-=mll_(b); }
    mll_ &operator*=(ll b){ return *this*=mll_(b); }
    mll_ &operator/=(ll b){ return *this/=mll_(b); }
    mll_ operator-()const{ return mll_(*this).invsg(); }
    mll_ operator+(mll_ b)const{ return mll_(*this)+=b; }
    mll_ operator-(mll_ b)const{ return mll_(*this)-=b; }
    mll_ operator*(mll_ b)const{ return mll_(*this)*=b; }
    mll_ operator/(mll_ b)const{ return mll_(*this)/=b; }
    mll_ operator+(ll b)const{ return mll_(*this)+=b; }
    mll_ operator-(ll b)const{ return mll_(*this)-=b; }
    mll_ operator*(ll b)const{ return mll_(*this)*=b; }
    mll_ operator/(ll b)const{ return mll_(*this)/=b; }
    friend mll_ operator+(ll a,mll_ b){ return b+a; }
    friend mll_ operator-(ll a,mll_ b){ return -b+a; }
    friend mll_ operator*(ll a,mll_ b){ return b*a; }
    friend mll_ operator/(ll a,mll_ b){ return mll_(a)/b; }
    bool operator==(mll_ b)const{ return val_==b.val_; }
    bool operator!=(mll_ b)const{ return val_!=b.val_; }
    bool operator==(ll b)const{ return *this==mll_(b); }
    bool operator!=(ll b)const{ return *this!=mll_(b); }
    friend bool operator==(ll a,mll_ b){ return mll_(a)==b; }
    friend bool operator!=(ll a,mll_ b){ return mll_(a)!=b; }
    friend ostream &operator<<(ostream &os,mll_  a){ return os << a.val_; }
    friend istream &operator>>(istream &is,mll_ &a){ return is >> a.val_; }
    mll_ pow(ll k)const{
        mll_ ret(1,false),a(*this);
        for (; k>0; k>>=1,a*=a) if (k&1)ret*=a;
        return ret;
    }
    static constexpr int mod() { return MOD; }
    //enum{ modll=MOD };
};


template<class T> struct Vector: vector<T>{
  using Int = long long;
  using vT=vector<T>;
  using cvT=const vector<T>;
  using cT=const T;
  using vT::vT; //親クラスのコンストラクタの隠蔽を回避
  using vT::begin,vT::end,vT::insert,vT::erase;
  auto it(Int i){ return begin()+i; }
  auto it(Int i)const{ return begin()+i; }
  Vector(cvT& b):vT(b){}
  Vector(vT&& b):vT(move(b)){}
  Vector(int n,cT& x):vT(n,x){}// ┬ 型推論のためラッパー
  Vector(long long n,cT& x):vT(n,x){}
  template<class S> Vector(const Vector<S>& b):vT(b.begin(),b.end()){}
  template<class S> Vector(const vector<S>& b):vT(b.begin(),b.end()){}
  Vector(Int n,T s,T d){ iota(n,s,d); }
  Vector(Int n,function<T(Int)> g):vT(n){ for(Int i=0;i<n;++i) (*this)[i]=g(i); }
  Vector &operator+=(cvT &b){ assert(size()==b.size()); for(Int i=0;i<size();++i) (*this)[i]+=b[i]; return *this; }
  Vector &operator-=(cvT &b){ assert(size()==b.size()); for(Int i=0;i<size();++i) (*this)[i]-=b[i]; return *this; }
  Vector &operator*=(cvT &b){ assert(size()==b.size()); for(Int i=0;i<size();++i) (*this)[i]*=b[i]; return *this; }
  Vector &operator/=(cvT &b){ assert(size()==b.size()); for(Int i=0;i<size();++i) (*this)[i]/=b[i]; return *this; }
  Vector &operator%=(cvT &b){ assert(size()==b.size()); for(Int i=0;i<size();++i) (*this)[i]%=b[i]; return *this; }
  Vector &operator+=(const Vector<T> &b){ return *this+=(cvT&)b; }
  Vector &operator-=(const Vector<T> &b){ return *this-=(cvT&)b; }
  Vector &operator*=(const Vector<T> &b){ return *this*=(cvT&)b; }
  Vector &operator/=(const Vector<T> &b){ return *this/=(cvT&)b; }
  Vector &operator%=(const Vector<T> &b){ return *this%=(cvT&)b; }
  Vector operator+(cvT &b){ return Vector(*this)+=b; }
  Vector operator-(cvT &b){ return Vector(*this)-=b; }
  Vector operator*(cvT &b){ return Vector(*this)*=b; }
  Vector operator/(cvT &b){ return Vector(*this)/=b; }
  Vector operator%(cvT &b){ return Vector(*this)%=b; }
  Vector operator+(const Vector<T> &b){ return Vector(*this)+=b; }
  Vector operator-(const Vector<T> &b){ return Vector(*this)-=b; }
  Vector operator*(const Vector<T> &b){ return Vector(*this)*=b; }
  Vector operator/(const Vector<T> &b){ return Vector(*this)/=b; }
  Vector operator%(const Vector<T> &b){ return Vector(*this)%=b; }
  template<class S> Vector &operator+=(S x){ for(T &e: *this) e+=x;  return *this; }
  template<class S> Vector &operator-=(S x){ for(T &e: *this) e-=x;  return *this; }
  template<class S> Vector &operator*=(S x){ for(T &e: *this) e*=x;  return *this; }
  template<class S> Vector &operator/=(S x){ for(T &e: *this) e/=x;  return *this; }
  template<class S> Vector &operator%=(S x){ for(T &e: *this) e%=x;  return *this; }
  template<class S> Vector operator+(S x)const{ return Vector(*this)+=x; }
  template<class S> Vector operator-(S x)const{ return Vector(*this)-=x; }
  template<class S> Vector operator*(S x)const{ return Vector(*this)*=x; }
  template<class S> Vector operator/(S x)const{ return Vector(*this)/=x; }
  template<class S> Vector operator%(S x)const{ return Vector(*this)%=x; }
  Vector &operator--(int){ return *this-=1; }
  Vector &operator++(int){ return *this+=1; }
  Vector operator-()const{ return Vector(*this)*=-1; }
  template<class S> friend Vector operator-(S x,const Vector &a){ return -a+=x; }
  T& at(Int i){ assert(i>=0); if(n()<=i)vT::resize(i+1); return vT::operator[](i); }
  Vector slice(Int l,Int r,Int d=1)const{
    Vector ret;
    for(Int i=l;(d>0&&i<=r)||(d<0&&r<=i);i+=d) ret.push_back((*this)[i]);
    return ret;
  }
  Int size()const{ return (Int)vT::size(); }
  Int n()const{ return size(); }
  Vector &push_back(cT& x,Int n=1){ for(Int i=0;i<n;++i){ vT::push_back(x); } return *this; }
  Vector &pop_back(Int n=1){ for(Int i=0;i<n;++i){ vT::pop_back(); } return *this; }
  Vector &push_front(cT& x,Int n=1){ this->insert(0,x,n); return *this; }
  Vector &pop_front(Int n=1){ erase(0,n-1); return *this; }
  T pull_back(){ T x=move(vT::back()); vT::pop_back(); return x; }
  T pull_front(){ T x=move(vT::front()); erase(0); return x; }
  Vector &insert(Int i,cT& x,Int n=1){ insert(it(i),n,x); return *this; }
  Vector &insert(Int i,cvT& b){ insert(it(i),b.begin(),b.end()); return *this; }
  Vector &erase(Int i){ erase(it(i)); return *this; }
  Vector &erase(Int l,Int r){ erase(it(l),it(r+1)); return *this; }
  Vector &erase(const Vector<Int> &idxs){
      for (Int I=0; I<idxs.n(); ++I){
          Int l=idxs[I]+1, r = (I<idxs.n()-1) ? idxs[I+1] : this->n();
          copy(it(l),it(r),it(l-I-1));//[l,r)を前にI+1個ずらす
      }
      vT::resize(this->n()-idxs.n());
      return *this;
  }
  Vector &eraseall(cT& x){ return eraseall(0,size()-1,x); }
  Vector &eraseall(Int l,Int r,cT& x){ erase(remove(it(l),it(r+1),x),it(r+1)); return *this; }
  template<class Pr> Vector &eraseif(Pr pr){ return eraseif(0,size()-1,pr); }
  template<class Pr> Vector &eraseif(Int l,Int r,Pr pr){ erase(remove_if(it(l),it(r+1),pr),it(r+1)); return *this; }
  Vector &concat(cvT &b,Int n=1){
    cvT B = (&b==this) ? *this : vT{};
    for(int i=0;i<n;++i) this->insert(size(),(&b==this)?B:b);
    return *this;
  }
  Vector repeat(Int n){ return Vector{}.concat(*this,n); }
  Vector &reverse(Int l=0,Int r=-1){ r+=r<0?size():0; std::reverse(it(l),it(r+1)); return *this; }
  Vector &rotate(Int m){ return rotate(0,size()-1,m); }
  Vector &rotate(Int l,Int r,Int m){ std::rotate(it(l),it(m),it(r+1)); return *this; }
  Vector &sort(Int l=0,Int r=-1){ r+=r<0?size():0; std::sort(it(l),it(r+1)); return *this; }
  Vector &rsort(Int l=0,Int r=-1){ return sort(l,r).reverse(l,r); }
  template<class Pr> Vector &sort(Pr pr){ return sort(0,size()-1,pr); }
  template<class Pr> Vector &sort(Int l,Int r,Pr pr){ std::sort(it(l),it(r+1),pr); return *this; }
  template<int key> Vector &sortbykey(Int l=0,Int r=-1){
    r+=r<0?size():0;
    sort(l,r,[](cT &x,cT &y){return get<key>(x)<get<key>(y);});
    return *this;
  }
  Vector &uniq(){ erase(unique(begin(),end()),end()); return *this; }
  Vector &sortq(){ return sort().uniq(); }
  Vector &fill(cT& x){ return fill(0,size()-1,x); }
  Vector &fill(Int l,Int r,cT& x){ std::fill(it(l),it(r+1),x); return *this; }
  Vector &copy(Int i,cvT &b,Int n=1){//A[i]スタートでbをn回分コピー
      for (int t=0; t<n; ++t) for (int j=0; j<(int)b.size(); ++j){
          if (i>=size()) return *this;
          if (i>=0) (*this)[i]=b[j];
          i++;
      }
      return *this;
  }
  template<class S=Int> Vector &iota(Int n,T s=0,S d=1){
    vT::resize(n);
    if(n==0) return *this;
    (*this)[0]=s;
    for(int i=1;i<n;++i) (*this)[i]=(*this)[i-1]+d;
    return *this;
  }
  Int count(cT& x)const{ return count(0,size()-1,x); }
  Int count(Int l,Int r,cT& x)const{ return Int(std::count(it(l),it(r+1),x)); }
  template<class Pr> Int countif(Pr pr)const{ return countif(0,size()-1,pr); }
  template<class Pr> Int countif(Int l,Int r,Pr pr)const{ return Int(count_if(it(l),it(r+1),pr)); }
  Int find(cT& x)const{ return find(0,size()-1,x); }
  Int find(Int l,Int r,cT& x)const{ return Int(std::find(it(l),it(r+1),x)-begin()); }
  Int rfind(cT& x)const{ return rfind(0,size()-1,x); }
  Int rfind(Int l,Int r,cT& x)const{
      for (int i=r;i>=l;--i) if ((*this)[i]==x) return i;
      return l-1;
  }
  template<class Pr> Int findif(Pr pr)const{ return findif(0,size()-1,pr); }
  template<class Pr> Int findif(Int l,Int r,Pr pr)const{ return Int(find_if(it(l),it(r+1),pr)-begin()); }
  Vector<Int> findall(cT& x)const{ return findall(0,size()-1,x); }
  Vector<Int> findall(Int l,Int r,cT& x)const{ return findallif(l,r,[&](cT& y){return y==x;}); }
  template<class Pr> Vector<Int> findallif(Pr pr)const{ return findallif(0,size()-1,pr); }
  template<class Pr> Vector<Int> findallif(Int l,Int r,Pr pr)const{
    Vector<Int> ret;
    for(Int i=l;i<=r;++i) if(pr((*this)[i])) ret.push_back(i);
    return ret;
  }
  Int  flooridx(cT& x)const{ return Int(upper_bound(begin(),end(),x)-begin()-1); }
  Int   ceilidx(cT& x)const{ return Int(lower_bound(begin(),end(),x)-begin()); }
  Int  leftnmof(cT& x)const{ return flooridx(x)+1; }
  Int rightnmof(cT& x)const{ return size()-ceilidx(x); }
  bool contains(cT& x)const{ Int i=flooridx(x); return i>=0 && (*this)[i]==x; }
  template<class Pr> Int  flooridx(cT& x,Pr pr)const{ return Int(upper_bound(begin(),end(),x,pr)-begin()-1); }
  template<class Pr> Int   ceilidx(cT& x,Pr pr)const{ return Int(lower_bound(begin(),end(),x,pr)-begin()); }
  template<class Pr> Int  leftnmof(cT& x,Pr pr)const{ return flooridx(x,pr)+1; }
  template<class Pr> Int rightnmof(cT& x,Pr pr)const{ return size()-ceilidx(x,pr); }
  template<class Pr> bool contains(cT& x,Pr pr)const{ Int i=flooridx(x,pr); return i>=0 && (*this)[i]==x; }

  template<class S> using VV    = Vector<Vector<S>>; template<class S> using sVV    = vector<vector<S>>;
  template<class S> using VVV   = Vector<VV<S>>;     template<class S> using sVVV   = vector<sVV<S>>;
  template<class S> using VVVV  = Vector<VVV<S>>;    template<class S> using sVVVV  = vector<sVVV<S>>;
  template<class S> using VVVVV = Vector<VVVV<S>>;   template<class S> using sVVVVV = vector<sVVVV<S>>;
  auto tostd()const{ return tov(*this); }
  template <class S> static vector<S> tov(const Vector<S>&v){ return v; }
  template <class S> static sVV<S>    tov(const VV<S>    &v){ sVV<S>    ret; for(auto&& e:v) ret.push_back(e);         return ret; }
  template <class S> static sVVV<S>   tov(const VVV<S>   &v){ sVVV<S>   ret; for(auto&& e:v) ret.push_back(e.tostd()); return ret; }
  template <class S> static sVVVV<S>  tov(const VVVV<S>  &v){ sVVVV<S>  ret; for(auto&& e:v) ret.push_back(e.tostd()); return ret; }
  template <class S> static sVVVVV<S> tov(const VVVVV<S> &v){ sVVVVV<S> ret; for(auto&& e:v) ret.push_back(e.tostd()); return ret; }
};


#if 0
#define MODLL (1000000007LL)
#else
#define MODLL (998244353LL)
#endif
using mll = mll_<MODLL>;
//using mll = fraction;



namespace atcoder {

//======== from internal_type_traits.hpp
namespace internal {
#ifndef _MSC_VER
template <class T>
using is_signed_int128 =
typename std::conditional<std::is_same<T,__int128_t>::value ||
    std::is_same<T,__int128>::value,
    std::true_type,
    std::false_type>::type;

template <class T>
using is_unsigned_int128 =
typename std::conditional<std::is_same<T,__uint128_t>::value ||
    std::is_same<T,unsigned __int128>::value,
    std::true_type,
    std::false_type>::type;

template <class T>
using is_integral = typename std::conditional<std::is_integral<T>::value ||
    is_signed_int128<T>::value ||
    is_unsigned_int128<T>::value,
    std::true_type,
    std::false_type>::type;

template <class T>
using is_signed_int = typename std::conditional<(is_integral<T>::value &&
    std::is_signed<T>::value) ||
    is_signed_int128<T>::value,
    std::true_type,
    std::false_type>::type;

template <class T>
using is_unsigned_int =
typename std::conditional<(is_integral<T>::value &&
    std::is_unsigned<T>::value) ||
    is_unsigned_int128<T>::value,
    std::true_type,
    std::false_type>::type;

#else

template <class T> using is_integral = typename std::is_integral<T>;

template <class T>
using is_signed_int =
typename std::conditional<is_integral<T>::value && std::is_signed<T>::value,
    std::true_type,
    std::false_type>::type;

template <class T>
using is_unsigned_int =
typename std::conditional<is_integral<T>::value &&
    std::is_unsigned<T>::value,
    std::true_type,
    std::false_type>::type;

#endif

template <class T>
using is_signed_int_t = std::enable_if_t<is_signed_int<T>::value>;

template <class T>
using is_unsigned_int_t = std::enable_if_t<is_unsigned_int<T>::value>;

}  // namespace internal

//======== from internal_bit.hpp
namespace internal {

using std::bit_ceil;

inline int countr_zero(unsigned int n) {
#ifdef _MSC_VER
    unsigned long index;
    _BitScanForward(&index,n);
    return index;
#else
    return __builtin_ctz(n);
#endif
}

constexpr int countr_zero_constexpr(unsigned int n) {
    int x = 0;
    while (!(n & (1 << x))) x++;
    return x;
}

}  // namespace internal

//======== from internal_math.hpp
namespace internal {

constexpr long long safe_mod(long long x,long long m) {
    x %= m;
    if (x < 0) x += m;
    return x;
}

constexpr long long pow_mod_constexpr(long long x,long long n,int m) {
    if (m == 1) return 0;
    unsigned int _m = (unsigned int)(m);
    unsigned long long r = 1;
    unsigned long long y = safe_mod(x,m);
    while (n) {
        if (n & 1) r = (r * y) % _m;
        y = (y * y) % _m;
        n >>= 1;
    }
    return r;
}

constexpr bool is_prime_constexpr(int n) {
    if (n <= 1) return false;
    if (n == 2 || n == 7 || n == 61) return true;
    if (n % 2 == 0) return false;
    long long d = n - 1;
    while (d % 2 == 0) d /= 2;
    constexpr long long bases[3] = {2, 7, 61};
    for (long long a : bases) {
        long long t = d;
        long long y = pow_mod_constexpr(a,t,n);
        while (t != n - 1 && y != 1 && y != n - 1) {
            y = y * y % n;
            t <<= 1;
        }
        if (y != n - 1 && t % 2 == 0) {
            return false;
        }
    }
    return true;
}
template <int n> constexpr bool is_prime = is_prime_constexpr(n);

constexpr std::pair<long long,long long> inv_gcd(long long a,long long b) {
    a = safe_mod(a,b);
    if (a == 0) return {b, 0};

    long long s = b,t = a;
    long long m0 = 0,m1 = 1;

    while (t) {
        long long u = s / t;
        s -= t * u;
        m0 -= m1 * u;

        auto tmp = s;
        s = t;
        t = tmp;
        tmp = m0;
        m0 = m1;
        m1 = tmp;
    }
    if (m0 < 0) m0 += b / s;
    return {s, m0};
}

constexpr int primitive_root_constexpr(int m) {
    if (m == 2) return 1;
    if (m == 167772161) return 3;
    if (m == 469762049) return 3;
    if (m == 754974721) return 11;
    if (m == 998244353) return 3;
    int divs[20] = {};
    divs[0] = 2;
    int cnt = 1;
    int x = (m - 1) / 2;
    while (x % 2 == 0) x /= 2;
    for (int i = 3; (long long)(i)*i <= x; i += 2) {
        if (x % i == 0) {
            divs[cnt++] = i;
            while (x % i == 0) {
                x /= i;
            }
        }
    }
    if (x > 1) {
        divs[cnt++] = x;
    }
    for (int g = 2;; g++) {
        bool ok = true;
        for (int i = 0; i < cnt; i++) {
            if (pow_mod_constexpr(g,(m - 1) / divs[i],m) == 1) {
                ok = false;
                break;
            }
        }
        if (ok) return g;
    }
}
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);

}  // namespace internal

//======== from modint.hpp
namespace internal {
struct modint_base {};
struct static_modint_base: modint_base {};
}  // namespace internal

template <int m,std::enable_if_t<(1 <= m)>* = nullptr>
struct static_modint: internal::static_modint_base {
    using mint = static_modint;

public:
    static constexpr int mod() { return m; }
    static mint raw(int v) {
        mint x;
        x._v = v;
        return x;
    }

    static_modint(): _v(0) {}
    template <class T,internal::is_signed_int_t<T>* = nullptr>
    static_modint(T v) {
        long long x = (long long)(v % (long long)(umod()));
        if (x < 0) x += umod();
        _v = (unsigned int)(x);
    }
    template <class T,internal::is_unsigned_int_t<T>* = nullptr>
    static_modint(T v) {
        _v = (unsigned int)(v % umod());
    }

    unsigned int val() const { return _v; }

    mint& operator++() {
        _v++;
        if (_v == umod()) _v = 0;
        return *this;
    }
    mint& operator--() {
        if (_v == 0) _v = umod();
        _v--;
        return *this;
    }
    mint operator++(int) {
        mint result = *this;
        ++*this;
        return result;
    }
    mint operator--(int) {
        mint result = *this;
        --*this;
        return result;
    }

    mint& operator+=(const mint& rhs) {
        _v += rhs._v;
        if (_v >= umod()) _v -= umod();
        return *this;
    }
    mint& operator-=(const mint& rhs) {
        _v -= rhs._v;
        if (_v >= umod()) _v += umod();
        return *this;
    }
    mint& operator*=(const mint& rhs) {
        unsigned long long z = _v;
        z *= rhs._v;
        _v = (unsigned int)(z % umod());
        return *this;
    }
    mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }

    mint operator+() const { return *this; }
    mint operator-() const { return mint() - *this; }

    mint pow(long long n) const {
        assert(0 <= n);
        mint x = *this,r = 1;
        while (n) {
            if (n & 1) r *= x;
            x *= x;
            n >>= 1;
        }
        return r;
    }
    mint inv() const {
        if (prime) {
            assert(_v);
            return pow(umod() - 2);
        }
        else {
            auto eg = internal::inv_gcd(_v,m);
            assert(eg.first == 1);
            return eg.second;
        }
    }

    friend mint operator+(const mint& lhs,const mint& rhs) {
        return mint(lhs) += rhs;
    }
    friend mint operator-(const mint& lhs,const mint& rhs) {
        return mint(lhs) -= rhs;
    }
    friend mint operator*(const mint& lhs,const mint& rhs) {
        return mint(lhs) *= rhs;
    }
    friend mint operator/(const mint& lhs,const mint& rhs) {
        return mint(lhs) /= rhs;
    }
    friend bool operator==(const mint& lhs,const mint& rhs) {
        return lhs._v == rhs._v;
    }
    friend bool operator!=(const mint& lhs,const mint& rhs) {
        return lhs._v != rhs._v;
    }

private:
    unsigned int _v;
    static constexpr unsigned int umod() { return m; }
    static constexpr bool prime = internal::is_prime<m>;
};

namespace internal {

template <class T>
using is_static_modint = std::is_base_of<internal::static_modint_base,T>;

template <class T>
using is_static_modint_t = std::enable_if_t<is_static_modint<T>::value>;

}  // namespace internal

//======== from convoution.hpp
namespace internal {

template <class mint,
    int g = internal::primitive_root<mint::mod()>,
    internal::is_static_modint_t<mint>* = nullptr>
struct fft_info {
    static constexpr int rank2 = countr_zero_constexpr(mint::mod() - 1);
    std::array<mint,rank2 + 1> root;   // root[i]^(2^i) == 1
    std::array<mint,rank2 + 1> iroot;  // root[i] * iroot[i] == 1

    std::array<mint,std::max(0,rank2 - 2 + 1)> rate2;
    std::array<mint,std::max(0,rank2 - 2 + 1)> irate2;

    std::array<mint,std::max(0,rank2 - 3 + 1)> rate3;
    std::array<mint,std::max(0,rank2 - 3 + 1)> irate3;

    fft_info() {
        root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        {
            mint prod = 1,iprod = 1;
            for (int i = 0; i <= rank2 - 2; i++) {
                rate2[i] = root[i + 2] * prod;
                irate2[i] = iroot[i + 2] * iprod;
                prod *= iroot[i + 2];
                iprod *= root[i + 2];
            }
        }
        {
            mint prod = 1,iprod = 1;
            for (int i = 0; i <= rank2 - 3; i++) {
                rate3[i] = root[i + 3] * prod;
                irate3[i] = iroot[i + 3] * iprod;
                prod *= iroot[i + 3];
                iprod *= root[i + 3];
            }
        }
    }
};

template <class mint,internal::is_static_modint_t<mint>* = nullptr>
void butterfly(std::vector<mint>& a) {
    int n = int(a.size());
    int h = internal::countr_zero((unsigned int)n);

    static const fft_info<mint> info;

    int len = 0;  // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
    while (len < h) {
        if (h - len == 1) {
            int p = 1 << (h - len - 1);
            mint rot = 1;
            for (int s = 0; s < (1 << len); s++) {
                int offset = s << (h - len);
                for (int i = 0; i < p; i++) {
                    auto l = a[i + offset];
                    auto r = a[i + offset + p] * rot;
                    a[i + offset] = l + r;
                    a[i + offset + p] = l - r;
                }
                if (s + 1 != (1 << len))
                    rot *= info.rate2[countr_zero(~(unsigned int)(s))];
            }
            len++;
        }
        else {
            // 4-base
            int p = 1 << (h - len - 2);
            mint rot = 1,imag = info.root[2];
            for (int s = 0; s < (1 << len); s++) {
                mint rot2 = rot * rot;
                mint rot3 = rot2 * rot;
                int offset = s << (h - len);
                for (int i = 0; i < p; i++) {
                    auto mod2 = 1ULL * mint::mod() * mint::mod();
                    auto a0 = 1ULL * a[i + offset].val();
                    auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
                    auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
                    auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
                    auto a1na3imag =
                        1ULL * mint(a1 + mod2 - a3).val() * imag.val();
                    auto na2 = mod2 - a2;
                    a[i + offset] = a0 + a2 + a1 + a3;
                    a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
                    a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
                    a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
                }
                if (s + 1 != (1 << len))
                    rot *= info.rate3[countr_zero(~(unsigned int)(s))];
            }
            len += 2;
        }
    }
}

template <class mint,internal::is_static_modint_t<mint>* = nullptr>
void butterfly_inv(std::vector<mint>& a) {
    int n = int(a.size());
    int h = internal::countr_zero((unsigned int)n);

    static const fft_info<mint> info;

    int len = h;  // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
    while (len) {
        if (len == 1) {
            int p = 1 << (h - len);
            mint irot = 1;
            for (int s = 0; s < (1 << (len - 1)); s++) {
                int offset = s << (h - len + 1);
                for (int i = 0; i < p; i++) {
                    auto l = a[i + offset];
                    auto r = a[i + offset + p];
                    a[i + offset] = l + r;
                    a[i + offset + p] =
                        (unsigned long long)(mint::mod() + l.val() - r.val()) *
                        irot.val();
                    ;
                }
                if (s + 1 != (1 << (len - 1)))
                    irot *= info.irate2[countr_zero(~(unsigned int)(s))];
            }
            len--;
        }
        else {
            // 4-base
            int p = 1 << (h - len);
            mint irot = 1,iimag = info.iroot[2];
            for (int s = 0; s < (1 << (len - 2)); s++) {
                mint irot2 = irot * irot;
                mint irot3 = irot2 * irot;
                int offset = s << (h - len + 2);
                for (int i = 0; i < p; i++) {
                    auto a0 = 1ULL * a[i + offset + 0 * p].val();
                    auto a1 = 1ULL * a[i + offset + 1 * p].val();
                    auto a2 = 1ULL * a[i + offset + 2 * p].val();
                    auto a3 = 1ULL * a[i + offset + 3 * p].val();

                    auto a2na3iimag =
                        1ULL *
                        mint((mint::mod() + a2 - a3) * iimag.val()).val();

                    a[i + offset] = a0 + a1 + a2 + a3;
                    a[i + offset + 1 * p] =
                        (a0 + (mint::mod() - a1) + a2na3iimag) * irot.val();
                    a[i + offset + 2 * p] =
                        (a0 + a1 + (mint::mod() - a2) + (mint::mod() - a3)) *
                        irot2.val();
                    a[i + offset + 3 * p] =
                        (a0 + (mint::mod() - a1) + (mint::mod() - a2na3iimag)) *
                        irot3.val();
                }
                if (s + 1 != (1 << (len - 2)))
                    irot *= info.irate3[countr_zero(~(unsigned int)(s))];
            }
            len -= 2;
        }
    }
}

template <class mint,internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_naive(const std::vector<mint>& a,
    const std::vector<mint>& b) {
    int n = int(a.size()),m = int(b.size());
    std::vector<mint> ans(n + m - 1);
    if (n < m) {
        for (int j = 0; j < m; j++) {
            for (int i = 0; i < n; i++) {
                ans[i + j] += a[i] * b[j];
            }
        }
    }
    else {
        for (int i = 0; i < n; i++) {
            for (int j = 0; j < m; j++) {
                ans[i + j] += a[i] * b[j];
            }
        }
    }
    return ans;
}

template <class mint,internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution_fft(std::vector<mint> a,std::vector<mint> b) {
    int n = int(a.size()),m = int(b.size());
    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    a.resize(z);
    internal::butterfly(a);
    b.resize(z);
    internal::butterfly(b);
    for (int i = 0; i < z; i++) {
        a[i] *= b[i];
    }
    internal::butterfly_inv(a);
    a.resize(n + m - 1);
    mint iz = mint(z).inv();
    for (int i = 0; i < n + m - 1; i++) a[i] *= iz;
    return a;
}

}  // namespace internal

template <class mint,internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(std::vector<mint>&& a,std::vector<mint>&& b) {
    int n = int(a.size()),m = int(b.size());
    if (!n || !m) return {};

    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    assert((mint::mod() - 1) % z == 0);

    if (std::min(n,m) <= 60) return convolution_naive(a,b);
    return internal::convolution_fft(a,b);
}

template <unsigned int mod = 998244353,
    class T,
    std::enable_if_t<internal::is_integral<T>::value>* = nullptr>
std::vector<T> convolution(const std::vector<T>& a,const std::vector<T>& b) {
    int n = int(a.size()),m = int(b.size());
    if (!n || !m) return {};

    using mint = static_modint<mod>;

    int z = (int)internal::bit_ceil((unsigned int)(n + m - 1));
    assert((mint::mod() - 1) % z == 0);

    std::vector<mint> a2(n),b2(m);
    for (int i = 0; i < n; i++) {
        a2[i] = mint(a[i]);
    }
    for (int i = 0; i < m; i++) {
        b2[i] = mint(b[i]);
    }
    auto c2 = convolution(std::move(a2),std::move(b2));
    std::vector<T> c(n + m - 1);
    for (int i = 0; i < n + m - 1; i++) {
        c[i] = c2[i].val();
    }
    return c;
}

}  // namespace atcoder
using atcoder::convolution;



namespace fpsspace{
using Int = long long;
using ll = long long;
constexpr int inf=int(1e9);

/********* utility関数 *********/
template<class T> T POW(T a,ll n){//a^n n負も可
    if (n<0) a=T(1)/a,n=-n;
    T r=1;
    for (; n>0; n>>=1,a*=a) if (n&1)r*=a;
    return r;
}
ll LimitMul(ll a,ll b,ll l=ll(9e18)){//min(a*b,l) a,b≧0
    return (b==0 || a<=l/b) ? a*b : l;
}
/*---- 1/i列挙 i=1~d ----*/
template<int Kind> struct Wrap{};//オーバロード解決用にKindを型に変換
template<class T,int Kind,class=enable_if_t<Kind==1 || Kind==2>>
std::vector<T> &Invs(int d,Wrap<Kind>){//Kind=1 or 2(modint系)の時
    static std::vector<T> invs(2,T(1));
    int MOD = T::mod();
    for (int i=(int)invs.size(); i<=d; ++i) invs.push_back(-invs[MOD%i]*T(MOD/i));
    return invs;
}
template<class T> std::vector<T> &Invs(int d,Wrap<0>){//その他の時
    static std::vector<T> invs(1);
    for (int i=(int)invs.size(); i<=d; ++i) invs.push_back(T(1)/i);
    return invs;
}

template<class T> std::vector<T> &Fact(int d){// i!列挙 i=0~d
    static std::vector<T> fact(1,T(1));
    for (int i=(int)fact.size(); i<=d; ++i) fact.push_back(fact.back()*T(i));
    return fact;
}
template<class T,int Kind> std::vector<T> &FInv(int d){// 1/i!列挙 i=0~d
    static std::vector<T> finv(1,T(1));
    const std::vector<T> &invs=Invs<T>(d,Wrap<Kind>{});
    for (int i=(int)finv.size(); i<=d; ++i) finv.push_back(finv.back()*invs[i]);
    return finv;
}

// Berlekamp Massey法 2L-1次までのA(x)からA=P/QのQをL次で復元 Kind=1,2のみ
template <class T> std::vector<T> BerlekampMassey(const std::vector<T> &a){
    std::vector<T> C={1},B={1};//C:求める数列、B:1つ前のCの状態を保存
    int m=1; //ポインタ？っぽいもの
    T b=T(1); //前回のdの値
    auto C_update=[](std::vector<T> &C,T d,T b,std::vector<T> &B,int m){
        T d_b=d/b;
        int M=(int)B.size();
        if ((int)C.size()<M+m) C.resize(M+m);
        for (int i=0; i<M; ++i) C[i+m]-=d_b*B[i];
    };
    for (int n=0; n<(int)a.size(); ++n){
        T d=T(0);
        for (int k=0; k<(int)C.size(); ++k) d+=C[k]*a[n-k]; //dを計算
        if (d!=T(0)){//①d=0なら、現在のCでAnを求める漸化式は成り立っている,そうでないなら調整
            if (2*((int)C.size()-1) <= n){
                std::vector<T> tmp=C;
                C_update(C,d,b,B,m); //C -= d/b * (Bをmだけ右シフトしたもの)
                B.swap(tmp);  b=d;  m=0;
            }
            else C_update(C,d,b,B,m); //C -= d/b * (Bをmだけ右シフトしたもの)
        }
        m++;
    }
    return C;
}

template<class FPS,class SPFPS,class T=typename FPS::value_type,class S>
FPS de_sparse( //a*F'=b*Fを満たすF
    const SPFPS &a_,const SPFPS &b_,S f0,Int dmx_,const std::vector<T> &invs_=std::vector<T>())
{
    assert(a_.lowdeg()<=b_.lowdeg());
    int dmx=(int)dmx_;
    const std::vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dmx,Wrap<FPS::kind>{});
    SPFPS a=a_.shift(-a_.lowdeg()),b=b_.shift(-a_.lowdeg());
    T a0inv=T(1)/a.co(0);
    a*=a0inv,b*=a0inv;
    a.erase(a.begin());
    FPS f({T(f0)},dmx);
    for (int d=1; d<=dmx; ++d){
        for (auto [bb,i]:b){
            if (d-1-i>=0) f.at(d)+=bb*f[d-1-i];
        }
        for (auto [aa,i]:a){
            if (d-i>=0) f.at(d)-=aa*f[d-i]*(d-i);
        }
        f.at(d)*=invs[d];
    }
    return f;
}

/********* 疎FPSクラス *********/
template<class T> struct sparseFps: std::vector<pair<T,Int>>{
    using std::vector<pair<T,Int>>::vector; //親クラスのコンストラクタの隠蔽を回避
    sparseFps &Norm(){//d昇順、同一dのco加算、co=0を削除
        sort(this->begin(),this->end(),
            [](const auto &x,const auto &y){return x.second<y.second; });
        int j=-1;
        for (int i=0; i<this->size(); ++i){
            if (j>=0 && deg(j)==deg(i)){
                co(j)+=co(i);
            }
            else{
                if (!(j>=0 && co(j)==T(0))) ++j;
                (*this)[j]=(*this)[i];
            }
        }
        if (j>=0 && co(j)==T(0)) --j;
        this->resize(j+1);
        return *this;
    }
    /*---- I/F ----*/
    template<class S,class R>
    void set(S co,R deg){ this->emplace_back(T(co),Int(deg)); }
    Int deg()const{ return this->empty() ? -1 : this->back().second; }//最高次数
    T  co(Int i)const{ return (*this)[i].first; }//(*this)[i]の係数
    T &co(Int i)     { return (*this)[i].first; }
    Int  deg(Int i)const{ return (*this)[i].second; }//(*this)[i]の次数
    Int &deg(Int i)     { return (*this)[i].second; }
    Int lowdeg()const{ return this->empty() ? inf : this->front().second; }
    sparseFps &operator+=(const sparseFps &sg){
        this->insert(this->end(),sg.begin(),sg.end());
        return Norm();
    }
    sparseFps operator+(const sparseFps &sg)const{ return sparseFps(*this)+=sg; }
    sparseFps &operator*=(T b){ for (auto&&[c,_]:*this) c*=b; return *this; }
    sparseFps operator*(T b)const{ return sparseFps(*this)*=b; }
    sparseFps &operator*=(const sparseFps &sg){ return *this=*this*sg; }
    sparseFps operator*(const sparseFps &sg)const{
        sparseFps ret;
        for (auto&&[cf,df]:*this) for (auto&&[cg,dg]:sg) ret.set(cf*cg,df+dg);
        return ret.Norm();
    }
    sparseFps shift(Int k)const{ // *x^k
        sparseFps ret;
        for (auto&&[co,d]:*this) if (d+k>=0) ret.set(co,d+k);
        return ret;
    }
    sparseFps diff()const{
        sparseFps ret;
        for (auto&&[co,d]:*this) if (d>0) ret.set(co*d,d-1);
        return ret;
    }
    template<class FPS> FPS exp(Int dmx)const{
        assert(lowdeg()!=0); //定数項=0必須
        return de_sparse<FPS>(sparseFps{{1,0},},diff(),1,dmx);
    }
    template<class FPS>
    FPS pow(ll k,Int dmx,const std::vector<T> &invs_=std::vector<T>())const{
        assert(!(k<0 && lowdeg()>0));//k負なら定数項必須
        if (k==0) return FPS({1},dmx);
        //-- 計算後最高次数d：k<0ならdmx、k>0ならmin(dmx,deg()*k)まで
        int d = (k<0 || LimitMul(deg(),k)>(ll)dmx) ? int(dmx) : int(deg()*k);
        //-- invs[i]=1/iをi=1~dまで計算(計算済み分は再利用、足りない分だけ計算)
        const std::vector<T> &invs = invs_.size() ? invs_ : Invs<T>(d,Wrap<FPS::kind>{});
        //-- 最低次数関連処理
        int s=(int)lowdeg();//計算前最低次数
        if (k>0 && LimitMul(s,k)>(ll)dmx) return FPS(dmx);//計算後all0の時
        //-- 漸化式で計算
        T f0inv=T(1)/co(0);
        FPS g({POW(co(0),k)},dmx);
        for (int i=1; i<=d-s*k; ++i){ //k負の時必ずs=0なのでOK
            for (int j=1; j<(int)this->size(); ++j){
                auto [c,dg]=(*this)[j];
                int b=int(dg)-s;
                if (i-b<0)break;
                g.at(i)+=c*g.at(i-b)*(T(k)*b-i+b);
            }
            g.at(i)*=f0inv*invs[i];
        }
        return g.shift(Int(s*k));
    }
};

/********* FPSクラス *********/
template<
    class T, //係数の型
    int Kind //係数の種類 0:その他、1:NTTfriendly mod、2:任意mod
>
struct Fps: std::vector<T>{
    static_assert(0<=Kind && Kind<=3);
    static constexpr int kind=Kind;
    int dMx=int(1e6); //次数上限(x^dMxより上は保持しない)
    using vT = std::vector<T>;
    /*---- utility ----*/
    int isize()const{ return (int)std::vector<T>::size(); }
    int NormSize()const{//leading zeroを除いたサイズ const用
        int sv=isize();
        while (sv>0 && (*this)[sv-1]==T(0)) --sv;
        return sv;
    }
    int Deg()const{ return NormSize()-1; } //最高次数 const用
    Fps &Cut(){ return cut(dMx); }
    Fps &ZeroExtend(){
        int anm=max(0,dMx-isize()+1);
        vT::insert(vT::end(),anm,T(0));
        return *this;
    }
    int MinD(const Fps &g)const{ return min(dMx,g.dMx); }
    void MergeD(const Fps &g){ dMx=MinD(g); Cut(); }
    template <int Sign> Fps &Add(const Fps &g){
        MergeD(g);
        for (int i=min(dMx,g.Deg()); i>=0; --i) at(i)+=Sign*g[i];
        return *this;
    }
    Fps ProdSparse(const sparseFps<T> &g,int d)const{//f*疎g mod x^(d+1)
        Fps ret(d);
        for (auto&&[co,dg]:g) for (int i=0; i<(int)isize(); ++i){
            if (dg+i>d) break;
            ret.at(dg+i)+=co*(*this)[i];
        }
        return ret;
    }
    Fps InvSparse(const sparseFps<T> &g,int d)const{//f/疎g mod x^(d+1) g0≠0
        assert(!g.empty() && g.deg(0)==0 && g.co(0)!=0);
        //-- g定数項を1にする
        T c0inv=T(1)/g.co(0);
        Fps ret=((*this)*c0inv).setdmx(d);
        if (g.size()==1u) return ret;
        sparseFps<T> gg=g*c0inv;
        //-- 配るDP計算
        for (int i=0; i+(int)gg.deg(1)<=d; ++i){
            for (int j=1; j<(int)gg.size(); ++j){
                auto [co,dg]=gg[j];
                int ii=i+(int)dg;
                if (d<ii)break;
                ret.at(ii)-=ret.at(i)*co;
            }
        }
        return ret;
    }
    Fps &LogSparse( //f+=log(疎g^k),g=1+ax^b
        const sparseFps<T> &g,ll k,const std::vector<T> &invs_=std::vector<T>())
    {
        assert(g.size()==2U && g.co(0)==T(1) && g.deg(0)==0);
        const std::vector<T> &invs = invs_.size() ? invs_ : Invs<T>(dMx,Wrap<Kind>{});
        int b=(int)g.deg(1);
        T c=g.co(1)*k;
        for (int i=1; i*b<=dMx; ++i,c*=-g.co(1)) at(i*b)+=c*invs[i];
        return *this;
    }
    /*---- コンストラクタ ----*/
    explicit Fps(Int dmx=int(1e6)): dMx(int(dmx)){}
    Fps(initializer_list<T> i,Int dmx=int(1e6)):
        vT(i.begin(),i.end()),dMx(int(dmx)){
        Cut();
    }
    template <class It,class=typename iterator_traits<It>::iterator_category>
    Fps(It l,It r,Int dmx=int(1e6)) : vT(l,r),dMx(int(dmx)){ Cut(); }
    Fps(std::vector<T> &&v,Int dmx=int(1e6)): vT(move(v)),dMx(int(dmx)){}
    Fps(const sparseFps<T> &sf,Int dmx=int(1e6)):dMx(int(dmx)){ //疎f → f
        for (auto&&[co,deg]:sf) if (deg<=dmx) at(deg)=co;
    }
    /*---- I/F ----*/
    sparseFps<T> tosparse()const{ //f → 疎f
        sparseFps<T> ret;
        for (int i=0; i<isize(); ++i){
            if ((*this)[i]!=T(0)) ret.set((*this)[i],i);
        }
        return ret;
    }
    Int size()const{ return (Int)std::vector<T>::size(); }
    Int deg(){ fit(); return size()-1; }
    Int lowdeg()const{
        for (int i=0; i<isize(); ++i){
            if ((*this)[i]!=T(0)) return i;
        }
        return inf;
    }
    Fps &setdmx(Int dmx){ dMx=(int)dmx; return Cut(); }
    T at(Int i)const{ return size()<=i ? T(0) : (*this)[i]; }
    T &at(Int i){
        if (size()<=i) this->resize(i+1);
        return (*this)[i];
    }
    Fps &fit(){
        this->resize(NormSize());
        return *this;
    }
    Fps &operator+=(const Fps &g){ return Add<1>(g); }
    Fps &operator-=(const Fps &g){ return Add<-1>(g); }
    Fps &operator*=(const Fps &g){ return *this=*this*g; }
    Fps &operator/=(const Fps &g){ return *this=*this/g; }
    Fps &operator*=(const sparseFps<T> &g){ return *this=*this*g; }
    Fps &operator/=(const sparseFps<T> &g){ return *this=*this/g; }
    Fps &operator+=(T c){ at(0)+=c; return *this; }
    Fps &operator-=(T c){ at(0)-=c; return *this; }
    Fps &operator*=(T c){ for (auto&& e: *this) e*=c; return *this; }
    Fps &operator/=(T c){ return (*this)*=T(1)/c; }
    Fps operator+(const Fps &g)const{ return Fps(*this)+=g; }
    Fps operator-(const Fps &g)const{ return Fps(*this)-=g; }
    Fps operator*(const Fps &g)const{ return Prod(*this,g,MinD(g)); }
    Fps operator/(const Fps &g)const{ return InvSparse(g.tosparse(),MinD(g)); }
    Fps operator*(const sparseFps<T> &g)const{ return ProdSparse(g,dMx); }
    Fps operator/(const sparseFps<T> &g)const{ return InvSparse(g,dMx); }
    Fps operator+(T c)const{ return Fps(*this)+=c; }
    Fps operator-(T c)const{ return Fps(*this)-=c; }
    Fps operator*(T c)const{ return Fps(*this)*=c; }
    Fps operator/(T c)const{ return Fps(*this)/=c; }
    Fps operator-()const{ return Fps(*this)*=T(-1); }
    friend Fps operator+(T c,const Fps &f){ return f+c; }
    friend Fps operator-(T c,const Fps &f){ return -f+c; }
    friend Fps operator*(T c,const Fps &f){ return f*c; }
    T prod1(const Fps &g,Int k_)const{ //[x^k]f*g
        int df=Deg(),dg=g.Deg(),k=(int)k_;
        if (MinD(g)<k) return T(0);
        T ret=T(0);
        for (int i=max(0,k-dg),j=k-i; i<=df&&j>=0; ++i,--j) ret+=(*this)[i]*g[j];
        return ret;
    }
    T bostanmori(const Fps &g,ll k)const{ //[x^k]f/g
        assert(g.at(0)!=0);
        Fps P=Fps(*this).setdmx(inf),Q=Fps(g).setdmx(inf);
        for (; k>0; k>>=1){
            Fps Q1=Q;
            for (int i=1; i<Q1.isize(); i+=2) Q1[i]*=-1; //Q1=(Qの奇数項を正負反転)
            Fps PQ=P*Q1,QQ=Q*Q1;
            P.clear(),Q.clear();
            for (int i=k&1; i<PQ.isize(); i+=2) P.push_back(PQ[i]);//P=(PQの奇or偶数項)
            for (int i=0; i<QQ.isize(); i+=2) Q.push_back(QQ[i]);//Q=(QQの偶数項)
        }
        return P.at(0)/Q[0];
    }
    Fps berlekamp_massey(Int d)const{ //f=P/QのQを得る x^d(d奇数)までの係数から推定
        assert(d%2==1);
        std::vector<T> f;
        for (int i=0; i<=d; ++i) f.push_back(at(i));
        std::vector<T> Q=BerlekampMassey(f);
        Int dmx=Int(Q.size()-1);
        return Fps(move(Q),dmx);
    }
    T nthterm(Int d,ll k)const{ //[x^k]f  線形漸化式を仮定しx^d(d奇数)までから推定
        Fps Q=berlekamp_massey(d);
        Fps P=Prod(*this,Q,Q.dMx-1).fit();
        return P.bostanmori(Q,k);
    }
    Fps &estimate(Int d,Int dmx=-1){ //dmx次まで推定 線形漸化式を仮定しx^d(d奇数)までから推定
        if (dmx==-1) dmx=dMx;
        Fps Q=berlekamp_massey(d);
        Fps P=Prod(*this,Q,Q.dMx-1).fit().setdmx(dmx);
        return *this=(Q.setdmx(dmx).inv()*P).ZeroExtend();
    }
    Fps &cut(Int d){ //x^dまでにする
        if (d+1<size()) vT::resize(size_t(d+1));
        return *this;
    }
    Fps &mod(Int n){ return cut(n-1); } //mod x^n
    [[nodiscard]] Fps shift(Int k_)const{ // *x^k
        Fps ret(dMx);
        const int k=(int)k_,m=min(isize()+k,dMx+1); //変換後長さ
        if (m<=0 || dMx<k) return ret; //空になる時
        for (int i=m-1-k; i>=max(0,-k); --i) ret.at(i+k)=(*this)[i];
        return ret;
    }
    T eval(T x)const{ //f(c)
        T ret=T(0);
        for (int i=isize()-1; i>=0; --i) ret*=x,ret+=(*this)[i];
        return ret;
    }
    Fps diff()const{ //微分
        Fps ret(dMx-1);
        for (int i=Deg(); i>=1; --i) ret.at(i-1)=(*this)[i]*i;
        return ret;
    }
    Fps integ()const{ //積分
        Fps ret(dMx+1);
        for (int i=min(Deg(),dMx); i>=0; --i) ret.at(i+1)=(*this)[i]/(i+1);
        return ret;
    }
    T integrange(T l,T r)const{ //定積分 ∫_l^r f dx
        Fps itg=integ();
        return itg.eval(r)-itg.eval(l);
    }
    Fps inv()const{
        assert(at(0)!=0);//定数項≠0
        Fps g{T(1)/at(0)};
        for (int i=1; i<dMx+1; i*=2){//i:項数
            g.setdmx(min(i*2-1,dMx));
            g = g+g-g*g*(*this);
        }
        return g;
    }
    Fps log()const{ //log f
        assert(at(0)==T(1));//定数項=1
        return (diff()*inv()).integ();
    }
    Fps exp()const{ //exp f
        assert(at(0)==T(0));//定数項=0
        Fps g{1};
        for (int i=1; i<dMx+1; i*=2){//i:項数
            g.setdmx(min(i*2-1,dMx));
            g = g*(T(1)-g.log()+(*this));
        }
        return g;
    }
    Fps pow(ll k)const{ //f^k  k<0は未対応
        if (k==0) return Fps({1},dMx);
        if (k==1) return *this;
        int z=(int)lowdeg();
        if (z==inf || z>int(dMx/k)) return Fps(dMx);//f(x)=0か結果=0の時
        int m=int(dMx+1-z*k); //最終は先頭にゼロがz*k個→計算はdMx+1-z*k項でok
        Fps g=shift(-z).setdmx(m-1)/at(z); //定数項1にする変換
        Fps gk=(g.log()*k).exp(); //g^k
        Fps ret=(gk*POW(at(z),k)).setdmx(dMx).shift(Int(z*k)); //変換を戻す
        return ret;
    }
    Fps powdbl(ll k)const{ //f^k
        Fps ret({1},dMx),g=*this;
        for (; k>0; k>>=1,g*=g) if (k&1)ret*=g;
        return ret;
    }
    Fps powsparse(ll k,const std::vector<T> &invs=std::vector<T>())const{ //疎f^k
        return tosparse().template pow<Fps>(k,dMx,invs);
    }
    pair<Fps,Fps> div(const Fps &g)const{ //多項式f/g,f%g
        const Fps &f=*this;
        int na=f.NormSize(),nb=g.NormSize();
        assert(nb>0);
        int n=na-nb+1;//商の項数
        if (n<=0) return {Fps(dMx),f};
        int nu=f.isize(),nv=g.isize();
        Fps aR(f.rbegin()+nu-na,f.rbegin()+min(nu-na+n,nu),n-1);
        Fps bR(g.rbegin()+nv-nb,g.rbegin()+min(nv-nb+n,nv),n-1);
        Fps qR=bR.inv()*aR;
        qR.resize(n);
        reverse(qR.begin(),qR.end());
        qR.fit().setdmx(dMx);
        Fps r=(f-Prod(qR,g,dMx)).fit();
        return {move(qR),move(r)};
    }
};

/********* 積をNTTmod畳み込み、任意mod畳み込み、畳み込み不使用から選択 *********/
template<class T> //f*g mod x^(d+1)  畳み込み不使用
Fps<T,0> Prod(const Fps<T,0> &f,const Fps<T,0> &g,int d){
    return f.ProdSparse(g.tosparse(),d);
}
template<class T> //f*g mod x^(d+1)  NTTmod畳み込み
Fps<T,1> Prod(const Fps<T,1> &f,const Fps<T,1> &g,int d){
    int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());
    std::vector<ll> ff,gg;
    ff.reserve(nf),gg.reserve(ng);
    for (int i=0; i<nf; ++i) ff.push_back(f[i].val());
    for (int i=0; i<ng; ++i) gg.push_back(g[i].val());
    std::vector<ll> hh=convolution<T::mod()>(ff,gg);
    if ((int)hh.size()>d+1) hh.resize(d+1);
    return Fps<T,1>(hh.begin(),hh.end(),d);
}
template<class T> //f*g mod x^(d+1)  任意mod畳み込み
Fps<T,2> Prod(const Fps<T,2> &f,const Fps<T,2> &g,int d){
    static constexpr int m0 = 167772161; //m0<m1<m2必須
    static constexpr int m1 = 469762049;
    static constexpr int m2 = 754974721;
    static constexpr int m01  = 104391568;// 1/m0(mod m1)
    static constexpr int m12  = 399692502;// 1/m1(mod m2)
    static constexpr int m012 = 190329765;// 1/m0m1(mod m2)
    static           int m0m1 = ll(m0)*m1 % T::mod();
    int nf=min(d+1,f.NormSize()),ng=min(d+1,g.NormSize());
    std::vector<ll> ff,gg;
    ff.reserve(nf),gg.reserve(ng);
    for (int i=0; i<nf; ++i) ff.push_back(f[i].val());
    for (int i=0; i<ng; ++i) gg.push_back(g[i].val());
    std::vector<ll> h0=convolution<m0>(ff,gg);
    std::vector<ll> h1=convolution<m1>(ff,gg);
    std::vector<ll> h2=convolution<m2>(ff,gg);
    Fps<T,2> ret(d);
    int nn=min(d+1,(int)h0.size());
    ret.reserve(nn);
    for (int i=0; i<nn; ++i){
        ll r0=h0[i],r1=h1[i],r2=h2[i];
        ll s0=r0;
        ll s1=(r1+m1-s0)*m01%m1; //s0<m1のため正になる
        ll s2=((r2+m2-s0)*m012+(m2-s1)*m12)%m2; //s0,s1<m2のため正になる
        ret.emplace_back(s0+s1*m0+s2*m0m1);
    }
    return ret;
}
#if 0 //f*g mod x^(d+1)  FFT畳み込み  使用時はFFTライブラリを貼った上で1にする
template<class T>
Fps<T,3> Prod(const Fps<T,3> &f,const Fps<T,3> &g,int d){
    std::vector<T> ff(f.begin(),f.end()),gg(g.begin(),g.end());
    std::vector<T> hh = ArbitraryModConvolution::CooleyTukey::multiply(ff,gg);
    if ((int)hh.size()>d+1) hh.resize(d+1);
    return Fps<T,3>(hh.begin(),hh.end(),d);
}
#endif
/********* I/F関数 *********/
template<class FPS,class T=typename FPS::value_type> FPS prodtwopow(//f^k*g^m
    sparseFps<T> f_,ll k,sparseFps<T> g_,ll m,Int dmx,
    const std::vector<T> &invs=std::vector<T>())
{
    if (k==0) f_={{T(1),0},},k=1;
    if (m==0) g_={{T(1),0},},m=1;
    Int fz=f_.lowdeg(),gz=g_.lowdeg();
    assert(!(fz==Int(1e9) && k<0) && !(gz==Int(1e9) && m<0));//f=0かつk>0はNG
    if (fz==Int(1e9) || gz==Int(1e9)) return FPS(dmx);//f=0なら結果=0
    ll z=fz*k+gz*m; //k,m巨大時のoverflowは未対応とする
    assert(z>=0);
    if (ll(dmx)<z) return FPS(dmx);
    sparseFps<T> f=f_.shift(-fz),g=g_.shift(-gz);
    Int dmx2=dmx-z;
    sparseFps<T> a=f*g,b=f.diff()*g*k+f*g.diff()*m;
    T h0=POW(f.co(0),k)*POW(g.co(0),m);
    FPS h=de_sparse<FPS>(a,b,h0,dmx2,invs);
    return h.setdmx(dmx).shift(Int(z));
}

}//namespace fpsspace
#if 0
using fpsT = dd;
using fps  = fpsspace::Fps<fpsT,0>; //0:畳み込み不使用
#elif 1
using fpsT = mll;
using fps  = fpsspace::Fps<fpsT,1>; //1:NTTfriendly mod
#elif 0
using fpsT = atcoder::modint;
using fps  = fpsspace::Fps<fpsT,2>; //2:任意mod
#elif 0
using fpsT = dd;
using fps  = fpsspace::Fps<fpsT,3>; //3:FFT
#endif
using spfps = fpsspace::sparseFps<fpsT>;
/*
- 各種演算の結果の次数上限は、一部例外を除きf,gの小さい方となる。
- 疎FPSクラスは次数昇順、係数≠0必須
- -------- コンストラクタ --------
fps f;             //f(x)=0            次数上限1e6
fps f(d);          // 〃                  〃    d
fps f{2,3,4,};     //f(x)=2+3x+4x^2    次数上限1e6
fps f({2,3,4,},d); // 〃                  〃    d
fps f(all(v));     //vll等のvをコピー  次数上限1e6
fps f(all(v),d);   // 〃                  〃    d
- -------- コンストラクタ疎版 --------  vector<pair>と同じ
spfps sf={{4,2},{-1,5}}; //f(x)=4x^2-x^5
sf.set(c,d);             //c*x^dを末尾に追加
- -------- 演算子(fps同士) --------
f+=g f-=g f+g f-g -f 疎f+=疎g 疎f*=疎g 疎f+疎g 疎f*疎g
f*=g f*g              //NTTmod,任意mod,愚直がテンプレートで切り替わる
f*=疎g f*疎g          //愚直
f/=g f/=疎g f/g f/疎g //漸化式で愚直  g定数項≠0
- -------- 演算子(定数) --------
f+=c f-=c f*=c f/=c f+c f-c f*c f/c 疎f*=c 疎f*c
- -------- アクセス・操作 --------
f[i]=val;           //直接操作
f.at(i)=val;        //自動サイズ調整有
ll n=f.size();      //項数(次数+1)  leading zero含む
ll d=f.deg();       //非0の最高次の次数 f(x)=0の時-1
ll d=f.lowdeg();    //非0の最低次の次数 f(x)=0の時1e9
f.setdmx(d);        //次数上限をx^dにセット ＆ mod x^(d+1)  d≧0
f.fit();            //最高次≠0になるよう縮める
fps f(sf);             //疎f→f 変換
fps f(sf,d);           //疎f→f 変換  次数上限d
spfps sf=f.tosparse(); //f→疎f 変換
- -------- 演算 --------
mll c=f.prod1(g,k);     //[x^k]f*g
mll c=f.bostanmori(g,k);//[x^k]f/g  g定数項≠0  k巨大(10^18)でもOK
f.cut(d);               //x^dまでにする
f.mod(n);               //mod x^n
fps g=f.shift(k);       //f*x^k         k負も可
spfps sg=sf.shift(k);   //疎f*x^k       k負も可
mll val=f.eval(c);      //f(c)
fps g=f.diff();         //微分
fps g=f.integ();        //積分
mll val=f.integrange(l,r); //定積分 ∫_l^r f dx
fps g=f.inv();          //1/f     定数項≠0
fps g=f.log();          //log f   定数項=1
fps g=f.exp();          //exp f   定数項=0
fps g=sf.exp<fps>(d);   //exp 疎f 定数項=0
fps g=f.pow(k);         //f^k    k負は未対応
fps g=f.powdbl(k);      //f^k    doubling版
fps g=sf.pow<fps>(k,d); //疎f^k  次数上限d  k負も可(定数項≠0必須)
fps g=f.powsparse(k);   //疎f^k             k負も可(定数項≠0必須)
auto[h,r]=f.div(g);     //多項式の除算・剰余 h=f/g,r=f%g  次数上限はfの方
fps Q=f.berlekamp_massey(d); //f=P/QのQを復元 x^d(d奇数)までから推定
                             //Qの次数≦(d+1)/2 QのdmxはQの次数になる
mll c=f.nthterm(d,k);        //[x^k]f  k~10^18も可  x^d(d奇数)までから推定
f.estimate(d);               //x^d(d奇数)までを使用し次数上限まで推定
f.estimate(d,k);             //x^d(d奇数)までを使用しk次まで推定
fps F=fpsspace::de_sparse<fps>(sf,sg,F0,d); //微分方程式 疎f*F'=疎g*F  次数上限d
fps h=fpsspace::prodtwopow<fps>(sf,k,sg,m,d); //疎f^k*疎g^m 次数上限d  k,m負も可
*/



namespace SolvingSpace{

template<class T> using vector = Vector<T>;
using    vll=vector<   ll>; using    vmll=vector<   mll>; using    vdd=vector<   dd>;
using   vvll=vector<  vll>; using   vvmll=vector<  vmll>; using   vvdd=vector<  vdd>;
using  vvvll=vector< vvll>; using  vvvmll=vector< vvmll>; using  vvvdd=vector< vvdd>;
using vvvvll=vector<vvvll>; using vvvvmll=vector<vvvmll>; using vvvvdd=vector<vvvdd>;
using   vpll=vector<  pll>; using    vtll=vector<   tll>; using   vqll=vector<  qll>;
using  vvpll=vector< vpll>; using   vvtll=vector<  vtll>; using  vvqll=vector< vqll>;
using vss=vector<string>;
template<class T> vector<T> cinv(ll nm){ return vector<T>(nm,[](ll i){ (void)i; return cin1<T>(); }); }
template<class T> vector<vector<T>> cinvv(ll H,ll W){ return vector<vector<T>>(H,[&](ll i){ (void)i; return cinv<T>(W); }); }

/*■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■
■■■■■■■■■■■■■■■*/



void cin2solve()
{
    auto [N,M,K,L]=cins<ll,ll,ll,ll>();

    mll ans=0;
    rep(v,L,M){
        ll ji=K*M-v;
        mll Q,P;
        {
            spfps sf={{1,0},{-1,v+1}};
            fps f=sf.pow<fps>(N-1,ji); //疎f^k  次数上限d  k負も可(定数項≠0必須)
            
            spfps sg={{1,0},{-1,1}};
            fps g=sg.pow<fps>(-N+1,ji); //疎f^k  次数上限d  k負も可(定数項≠0必須)
            fps h=f*g;
            Q=h.at(ji);
        }
        {
            spfps sf={{1,0},{-1,M+1}};
            fps f=sf.pow<fps>(N-1,ji); //疎f^k  次数上限d  k負も可(定数項≠0必須)

            spfps sg={{1,0},{-1,1}};
            fps g=sg.pow<fps>(-N+1,ji); //疎f^k  次数上限d  k負も可(定数項≠0必須)
            fps h=f*g;
            P=h.at(ji);
        }
        ans+=P-Q;
    }
    cout << ans << '\n';

    return;
}


}//SolvingSpace

//////////////////////////////////////////


int main(){
#if defined(RANDOM_TEST)
    SolvingSpace::cin2solve();
    SolvingSpace::generand();
#else
  #if 1
    //SolvingSpace::labo();'
    SolvingSpace::cin2solve();
  #else
    ll t;  cin >> t;
    rep(i,0,t-1){
        SolvingSpace::cin2solve();
    }
  #endif
#endif
    cerr << timeget() <<"ms"<< '\n';
    return 0;
}
yukicoder

結果

ソースコード