結果

問題 No.2936 Sum of Square of Mex
ユーザー 👑 NachiaNachia
提出日時 2024-10-29 22:25:35
言語 C++17
(gcc 12.3.0 + boost 1.83.0)
結果
WA  
実行時間 -
コード長 20,751 bytes
コンパイル時間 1,513 ms
コンパイル使用メモリ 96,944 KB
実行使用メモリ 10,528 KB
最終ジャッジ日時 2024-10-29 22:25:39
合計ジャッジ時間 4,034 ms
ジャッジサーバーID
(参考情報)
judge4 / judge1
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 2 ms
6,820 KB
testcase_01 WA -
testcase_02 AC 10 ms
6,816 KB
testcase_03 WA -
testcase_04 AC 2 ms
6,816 KB
testcase_05 AC 2 ms
6,816 KB
testcase_06 AC 2 ms
6,820 KB
testcase_07 WA -
testcase_08 AC 62 ms
9,804 KB
testcase_09 WA -
testcase_10 AC 33 ms
6,828 KB
testcase_11 WA -
testcase_12 AC 35 ms
6,956 KB
testcase_13 WA -
testcase_14 WA -
testcase_15 AC 58 ms
9,644 KB
testcase_16 AC 73 ms
10,396 KB
testcase_17 AC 65 ms
10,392 KB
testcase_18 AC 65 ms
10,524 KB
testcase_19 AC 66 ms
10,396 KB
testcase_20 AC 66 ms
10,396 KB
testcase_21 AC 66 ms
10,392 KB
testcase_22 AC 65 ms
10,396 KB
testcase_23 AC 66 ms
10,524 KB
testcase_24 WA -
testcase_25 WA -
testcase_26 AC 66 ms
10,396 KB
testcase_27 AC 66 ms
10,396 KB
testcase_28 AC 65 ms
10,400 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#ifdef NACHIA
#define _GLIBCXX_DEBUG
#else
#define NDEBUG
#endif
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using i64 = long long;
using u64 = unsigned long long;
#define rep(i,n) for(int i=0; i<int(n); i++)
const i64 INF = 1001001001001001001;
template<typename A> void chmin(A& l, const A& r){ if(r < l) l = r; }
template<typename A> void chmax(A& l, const A& r){ if(l < r) l = r; }
#include <atcoder/modint>
using Modint = atcoder::static_modint<998244353>;
using namespace std;

#include <cassert>

namespace nachia{

template<unsigned int MOD>
struct PrimitiveRoot{
    using u64 = unsigned long long;
    static constexpr u64 powm(u64 a, u64 i) {
        u64 res = 1, aa = a;
        for( ; i; i /= 2){
            if(i & 1) res = res * aa % MOD;
            aa = aa * aa % MOD;
        }
        return res;
    }
    static constexpr bool ExamineVal(unsigned int g){
        u64 t = MOD - 1;
        for(u64 d=2; d*d<=t; d+=1+(d&1)) if(t % d == 0){
            if(powm(g, (MOD - 1) / d) == 1) return false;
            while(t % d == 0) t /= d;
        }
        if(t != 1) if(powm(g, (MOD - 1) / t) == 1) return false;
        return true;
    }
    static constexpr unsigned int GetVal(){
        for(u64 x=2; x<MOD; x++) if(ExamineVal(x)) return x;
        return 0;
    }
    static const unsigned int val = GetVal();
};

} // namespace nachia

namespace nachia{

template<class Modint>
class Comb{
private:
    std::vector<Modint> F;
    std::vector<Modint> iF;
public:
    void extend(int newN){
        int prevN = (int)F.size() - 1;
        if(prevN >= newN) return;
        F.resize(newN+1);
        iF.resize(newN+1);
        for(int i=prevN+1; i<=newN; i++) F[i] = F[i-1] * Modint::raw(i);
        iF[newN] = F[newN].inv();
        for(int i=newN; i>prevN; i--) iF[i-1] = iF[i] * Modint::raw(i);
    }
    Comb(int n = 1){
        F.assign(2, Modint(1));
        iF.assign(2, Modint(1));
        extend(n);
    }
    Modint factorial(int n) const { return F[n]; }
    Modint invFactorial(int n) const { return iF[n]; }
    Modint invOf(int n) const { return iF[n] * F[n-1]; }
    Modint comb(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return F[n] * iF[r] * iF[n-r];
    }
    Modint invComb(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return iF[n] * F[r] * F[n-r];
    }
    Modint perm(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return F[n] * iF[n-r];
    }
    Modint invPerm(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return iF[n] * F[n-r];
    }
    Modint operator()(int n, int r) const { return comb(n,r); }
};

} // namespace nachia

namespace nachia{

int Popcount(unsigned long long c) noexcept {
#ifdef __GNUC__
    return __builtin_popcountll(c);
#else
    c = (c & (~0ull/3)) + ((c >> 1) & (~0ull/3));
    c = (c & (~0ull/5)) + ((c >> 2) & (~0ull/5));
    c = (c & (~0ull/17)) + ((c >> 4) & (~0ull/17));
    c = (c * (~0ull/257)) >> 56;
    return c;
#endif
}

// please ensure x != 0
int MsbIndex(unsigned long long x) noexcept {
#ifdef __GNUC__
    return 63 - __builtin_clzll(x);
#else
    using u64 = unsigned long long;
    int q = (x >> 32) ? 32 : 0;
    auto m = x >> q;
    constexpr u64 hi = 0x8888'8888;
    constexpr u64 mi = 0x1111'1111;
    m = (((m | ~(hi - (m & ~hi))) & hi) * mi) >> 35;
    m = (((m | ~(hi - (x & ~hi))) & hi) * mi) >> 31;
    q += (m & 0xf) << 2;
    q += 0x3333'3333'2222'1100 >> (((x >> q) & 0xf) << 2) & 0xf;
    return q;
#endif
}

// please ensure x != 0
int LsbIndex(unsigned long long x) noexcept {
#ifdef __GNUC__
    return __builtin_ctzll(x);
#else
    return MsbIndex(x & -x);
#endif
}

}


namespace nachia {

template<class mint>
struct NttInterface{

template<class Iter>
void Butterfly(Iter, int) const {}

template<class Iter>
void IButterfly(Iter, int) const {}

template<class Iter>
void BitReversal(Iter a, int N) const {
    for(int i=0, j=0; j<N; j++){
        if(i < j) std::swap(a[i], a[j]);
        for(int k = N>>1; k > (i^=k); k>>=1);
    }
}

};

} // namespace nachia
#include <iterator>
#include <array>

namespace nachia{

template <class mint>
struct Ntt : NttInterface<mint> {

using u32 = unsigned int;
using u64 = unsigned long long;
    
static int ceil_pow2(int n) {
    int x = 0;
    while ((1U << x) < (u32)(n)) x++;
    return x;
}
    
static constexpr int bsf_constexpr(unsigned int n) {
    int x = 0;
    while (!(n & (1 << x))) x++;
    return x;
}

struct fft_info {
    static constexpr u32 g = nachia::PrimitiveRoot<mint::mod()>::val;
    static constexpr int rank2 = bsf_constexpr(mint::mod()-1);
    using RootTable = std::array<mint, rank2+1>;
    RootTable root, iroot, rate3, irate3;

    fft_info(){
        root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for(int i=rank2-1; i>=0; i--){
            root[i] = root[i+1] * root[i+1];
            iroot[i] = iroot[i+1] * iroot[i+1];
        }
        mint prod = 1, iprod = 1;
        for(int i=0; i<=rank2-3; i++){
            rate3[i] = root[i+3] * prod;
            irate3[i] = iroot[i+3] * iprod;
            prod *= iroot[i+3];
            iprod *= root[i+3];
        }
    }
};

template<class RandomAccessIterator>
void ButterflyLayered(RandomAccessIterator a, int n, int stride, int repeat) const {
    static const fft_info info;
    int h = n * stride;
    
    while(repeat--){

    int len = 1;
    int p = h;
    if(ceil_pow2(n)%2 == 1){
        p >>= 1;
        for(int i=0; i<p; i++){
            mint l = a[i], r = a[i+p];
            a[i] = l+r; a[i+p] = l-r;
        }
        len <<= 1;
    }
    for( ; p > stride; ){
        p >>= 2;
        mint rot = 1, imag = info.root[2];
        u64 mod2 = u64(mint::mod()) * mint::mod();
        int offset = p;
        for(int s=0; s<len; s++){
            if(s) rot *= info.rate3[LsbIndex(~(u32)(s-1))];
            mint rot2 = rot * rot;
            mint rot3 = rot2 * rot;
            for(int i=offset-p; i<offset; i++){
                u64 a0 = u64(a[i].val());
                u64 a1 = u64(a[i+p].val()) * rot.val();
                u64 a2 = u64(a[i+2*p].val()) * rot2.val();
                u64 a3 = u64(a[i+3*p].val()) * rot3.val();
                u64 a1na3imag = u64(mint(a1 + mod2 - a3).val()) * imag.val();
                u64 na2 = mod2 - a2;
                a[i] = a0 + a2 + a1 + a3;
                a[i+1*p] = a0 + a2 + (2 * mod2 - (a1 + a3));
                a[i+2*p] = a0 + na2 + a1na3imag;
                a[i+3*p] = a0 + na2 + (mod2 - a1na3imag);
            }
            offset += p << 2;
        }
        len <<= 2;
    }
    
    a += h;
    }
}

template<class RandomAccessIterator>
void Butterfly(RandomAccessIterator a, int n) const {
    ButterflyLayered(a, n, 1, 1);
}

template<class RandomAccessIterator>
void IButterflyLayered(RandomAccessIterator a, int n, int stride, int repeat) const {

    static const fft_info info;
    constexpr int MOD = mint::mod();
    
    while(repeat--){
    
    int len = n;
    int p = stride;

    for( ; 2 < len; ){
        len >>= 2;
        mint irot = 1, iimag = info.iroot[2];
        int offset = p;
        for(int s=0; s<len; s++){
            if(s) irot *= info.irate3[LsbIndex(~(u32)(s-1))];
            mint irot2 = irot * irot;
            mint irot3 = irot2 * irot;
            for(int i=offset-p; i<offset; i++){
                u64 a0 = a[i].val();
                u64 a1 = a[i+p].val();
                u64 a2 = a[i+2*p].val();
                u64 a3 = a[i+3*p].val();
                u64 a2na3iimag = mint((a2 + MOD - a3) * iimag.val()).val();
                a[i] = a0 + a1 + a2 + a3;
                a[i+p] = (a0 + (MOD - a1) + a2na3iimag) * irot.val();
                a[i+2*p] = (a0 + a1 + (MOD - a2) + (MOD - a3)) * irot2.val();
                a[i+3*p] = (a0 + (MOD - a1) + (MOD - a2na3iimag)) * irot3.val();
            }
            offset += p << 2;
        }
        p <<= 2;
    }
    if(len == 2){
        for(int i=0; i<p; i++){
            mint l = a[i], r = a[i+p];
            a[i] = l+r; a[i+p] = l-r;
        }
        p <<= 1;
    }
    
    a += p;
    }
}

template<class RandomAccessIterator>
void IButterfly(RandomAccessIterator a, int n) const {
    IButterflyLayered(a, n, 1, 1);
}

};

} // namespace nachia

namespace nachia {

template<class Elem, class NttInst = Ntt<Elem>>
struct FpsNtt {
public:
    using Fps = FpsNtt;
    using ElemTy = Elem;
    static constexpr unsigned int MOD = Elem::mod();
    static constexpr int CONV_THRES = 30;
    static const NttInst nttInst;
    static const unsigned int zeta = nachia::PrimitiveRoot<MOD>::GetVal();
private:
    using u32 = unsigned int;
    static Elem ZeroElem() noexcept { return Elem(0); }
    static Elem OneElem() noexcept { return Elem(1); }
    static Comb<Elem> comb;
    std::vector<Elem> a;
    int RSZ(int& sz) const { return sz = (sz < 0 ? size() : sz); }
public:

    int size() const noexcept { return a.size(); }
    Elem& operator[](int x) noexcept { return a[x]; }
    const Elem& operator[](int x) const noexcept { return a[x]; }
    Elem getCoeff(int x) const noexcept { return (0 <= x && x < size()) ? a[x] : ZeroElem(); }
    static Comb<Elem>& GetComb() { return comb; }
    static int BestNttSize(int x) noexcept { assert(x); return 1 << MsbIndex(x*2-1); }
    Fps move(){ return std::move(*this); }
    Fps& set(int i, Elem c){ a[i] = c; return *this; }

    Fps& removeLeadingZeros(){
        int newsz = size();
        while(newsz && a[newsz-1].val() == 0) newsz--;
        a.resize(newsz);
        if((int)a.capacity() / 4 > newsz) a.shrink_to_fit();
        return *this;
    }

    FpsNtt(){}
    FpsNtt(int sz) : a(sz, ZeroElem()) {}
    FpsNtt(int sz, Elem e) : a(sz, e) {}
    FpsNtt(std::vector<Elem>&& src) : a(std::move(src)) {}
    FpsNtt(const std::vector<Elem>& src) : a(src) {}
    
    Fps& ntt() {
        capSize(BestNttSize(size()));
        nttInst.Butterfly(a.begin(), size());
        return *this;
    }
    Fps& intt() {
        nttInst.IButterfly(a.begin(), a.size());
        return times(Elem::raw(size()).inv());
    }
    Fps nttDouble(Fps vanilla) const {
        int n = size();
        assert(n != 0 && n == (n&-n)); // n is a power of 2
        Elem q = Elem::raw(zeta).pow((Elem::mod() - 1) / (n*2));
        Elem qq = OneElem();
        for(int i=0; i<n; i++){ vanilla[i] *= qq; qq *= q; }
        vanilla.ntt();
        Fps res = clip(0, n*2);
        for(int i=0; i<n; i++) res[n+i] = vanilla[i];
        return res;
    }
    Fps nttDouble() const { return nttDouble(clip().intt().move()); }

    // Fps res(resSz);
    // for(int j=0; j<resSz-destL && j+srcL < srcR; j++) res[j+destL] = a.getCoeff(j+srcL)
    // if srcR is unspecified -> srcR = max(srcL, size());
    // if resSz is unspecified -> resSz = destL + srcR - srcL
    Fps clip(int srcL, int srcR = -1, int destL = 0, int resSz = -1) const {
        srcR = RSZ(srcR);
        if(resSz < 0) resSz = destL + srcR - srcL;
        int rj = std::min(std::min(srcR, size()) - srcL, resSz - destL);
        Fps res(resSz);
        for(int j=std::max(0, -srcL); j<rj; j++) res[j+destL] = a[j+srcL];
        return res;
    }
    Fps clip() const { return *this; }

    Fps& capSize(int l, int r) {
        if(r <= (int)size()) a.resize(r);
        if(size() <= l) a.resize(l, ZeroElem());
        return *this;
    }
    Fps& capSize(int z){ a.resize(RSZ(z), ZeroElem()); return *this; }
    Fps& times(Elem x){ for(int i=0; i<size(); i++){ a[i] *= x; } return *this; }
    Fps& timesFactorial(int z = -1){ comb.extend(RSZ(z)); for(int i=0; i<z; i++){ a[i] *= comb.factorial(i); } return *this; }
    Fps& timesInvFactorial(int z = -1){ comb.extend(RSZ(z)); for(int i=0; i<z; i++){ a[i] *= comb.invFactorial(i); } return *this; }
    Fps& clrRange(int l, int r){ for(int i=l; i<r; i++){ a[i] = ZeroElem(); } return *this; }
    Fps& negate(){ for(auto& e : a){ e = -e; } return *this; }
    Fps& mulEach(const Fps& other, int maxi = -1){
        maxi = std::min(RSZ(maxi), std::min(size(), other.size()));
        for(int i=0; i<maxi; i++) a[i] *= other[i];
        return *this;
    }
    Fps& reverse(int sz = -1){ RSZ(sz); std::reverse(a.begin(), a.begin() + sz); return *this; }
    Fps& revRange(int l, int r = -1){ RSZ(r); std::reverse(a.begin() + l, a.begin() + r); return *this; }

    static Fps convolution(const Fps& a, const Fps& b, int sz = -1){
        if(std::min(a.size(), b.size()) <= CONV_THRES){
            if(a.size() > b.size()) return convolution(b, a, sz);
            if(sz < 0) sz = std::max(0, a.size() + b.size() - 1);
            std::vector<Elem> res(sz);
            for(int i=0; i<a.size(); i++) for(int j=0; j<b.size() && i+j<sz; j++) res[i+j] += a[i] * b[j];
            return res;
        }
        int Z = BestNttSize(a.size() + b.size() - 1);
        return a.clip(0, Z).ntt().mulEach(b.clip(0, Z).ntt()).intt().capSize(sz).move();
    }
    Fps convolve(const Fps& r, int sz = -1) const { return convolution(*this, r, sz); }
    
    //   1
    // ----- = 1 + f + f^2 + f^3 + ...
    //  1-f
    Fps powerSum(int sz) const {
        RSZ(sz);
        if(sz == 0) return {};
        int q = std::min(sz, 32);
        Fps x = Fps(q).set(0, OneElem()).move();
        for(int i=1; i<q; i++) for(int j=1; j<=std::min(i,(int)a.size()-1); j++) x[i] += x[i-j] * a[j];
        while(x.size() < sz){
            int hN = x.size(), N = hN*2;
            Fps a = x.clip(0, N).ntt().move();
            Fps b = clip(0, N).ntt().mulEach(a).intt().clrRange(0,hN).ntt().mulEach(a).intt().move();
            for(int i=0; i<hN; i++) b[i] = x[i];
            std::swap(b, x);
        }
        return x.capSize(sz).move();
    }

    Fps inv(int sz = -1) const {
        RSZ(sz);
        Elem iA0 = a[0].inv();
        return clip(0, std::min(sz, size())).times(-iA0).set(0, ZeroElem()).powerSum(sz).times(iA0).move();
    }
    
    Fps& difference(){
        if(size() == 0) return *this;
        for(int i=0; i+1<size(); i++) a[i] = a[i+1] * Elem::raw(i+1);
        return capSize(size()-1);
    }
    Fps& integral(){
        if(size() == 0) return capSize(1);
        capSize(size()+1);
        comb.extend(size());
        for(int i=size()-1; i>=1; i--) a[i] = a[i-1] * comb.invOf(i);
        return set(0, ZeroElem());
    }
    
    Fps log(int sz = -1){
        RSZ(sz);
        assert(sz != 0);
        assert(a[0].val() == 1);
        return convolution(inv(sz), clip().difference(), sz-1).integral();
    }

    Fps exp(int sz = -1){
        RSZ(sz);
        Fps res = Fps(1).set(0, OneElem());
        while(res.size() < sz){
            auto z = res.size();
            auto tmp = res.capSize(z*2).log().set(0, -OneElem()).move();
            for(int i=0; i<z*2 && i<size(); i++) tmp[i] -= a[i];
            auto resntt = res.clip().ntt().mulEach(tmp.ntt()).intt().move();
            for(int i=z; i<z*2; i++) res[i] = -resntt[i];
        }
        return res.capSize(0, sz).move();
    }
    
    Fps pow(unsigned long long k, int sz = -1){
        int n = RSZ(sz);
        if(k == 0) return Fps(n).set(0, OneElem()).move();
        int ctz = 0;
        while(ctz<n && a[ctz].val() == 0) ctz++;
        if((unsigned long long)ctz >= (n-1) / k + 1) return Fps(n);
        Elem a0 = a[ctz];
        return clip(ctz, ctz+n-ctz*k).times(a0.inv()).log().times(Elem(k)).exp().times(a0.pow(k)).clip(0, -1, ctz*k);
    }

    auto begin(){ return a.begin(); }
    auto end(){ return a.end(); }
    auto begin() const { return a.begin(); }
    auto end() const { return a.end(); }

    std::string toString(std::string beg = "[ ", std::string delim = " ", std::string en = " ]") const {
        std::string res = beg;
        bool f = false;
        for(auto x : a){ if(f){ res += delim; } f = true; res += std::to_string(x.val()); }
        res += en;
        return res;
    }

    std::vector<Elem> getVectorMoved(){ return std::move(a); }

    Fps& operator+=(const Fps& r){
        capSize(std::max(size(), r.size()));
        for(int i=0; i<r.size(); i++) a[i] += r[i];
        return *this;
    }
    Fps& operator-=(const Fps& r){
        capSize(std::max(size(), r.size()));
        for(int i=0; i<r.size(); i++) a[i] -= r[i];
        return *this;
    }
    Fps operator+(const Fps& r) const { return (clip(0, std::max(size(), r.size())) += r).move(); }
    Fps operator-(const Fps& r) const { return (clip(0, std::max(size(), r.size())) -= r).move(); }
    Fps operator-() const { return (clip().negate()).move(); }
    Fps operator*(const Fps& r) const { return convolve(r).removeLeadingZeros().move(); }
    Fps& operator*=(const Fps& r){ return (*this) = operator*(r); }
    Fps& operator*=(Elem m){ return times(m); }
    Fps operator*(Elem m) const { return (clip() *= m).move(); }

    Elem eval(Elem x) const {
        Elem res = 0;
        for(int i=size()-1; i>=0; i--) res = res * x + a[i];
        return res;
    }
};

template<class Elem, class NttInst> Comb<Elem> FpsNtt<Elem, NttInst>::comb;
template<class Elem, class NttInst> const NttInst FpsNtt<Elem, NttInst>::nttInst;

} // namespace nachia

using Fps = nachia::FpsNtt<Modint>;

namespace nachia{

template<class Fps>
Fps PolynomialTaylorShift(Fps f, typename Fps::ElemTy c){
    int n = f.size();
    Fps C = Fps(n).set(0,1);
    for(int i=1; i<n; i++) C[i] = C[i-1] * c;
    return f.timesFactorial().convolve(
        C.timesInvFactorial().reverse()).clip(n-1,2*n-1).timesInvFactorial().move();
}

template<class Fps>
Fps FpsAntiTaylorShift(Fps f, typename Fps::ElemTy c){
    int n = f.size();
    Fps C = Fps(n).set(0,1);
    for(int i=1; i<n; i++) C[i] = C[i-1] * c;
    return f.timesInvFactorial().convolve(
        C.timesInvFactorial(),n).timesFactorial().move();
}

} // namespace nachia

namespace nachia{

// x <- ax + b
// return size : max( necessary, f.size() )
template<class Fps>
Fps PolynomialSubstitute_Ax_B(
    Fps f,
    typename Fps::ElemTy a,
    typename Fps::ElemTy b
){
    using Elem = typename Fps::ElemTy;
    if(f.size() == 0){
        if(b.val() == 0) return Fps(0);
        return Fps(1).set(0,b);
    }
    f = PolynomialTaylorShift(f.move(), b);
    Elem q = 1;
    for(int i=1; i<f.size(); i++){ q *= a; f[i] *= q; }
    return f;
}

// x <- ax^2 + bx + c
// return size : max( necessary, f.size() * 2 - 1 )
template<class Fps>
Fps PolynomialSubstitute_Ax2_Bx_C(
    Fps f,
    typename Fps::ElemTy a,
    typename Fps::ElemTy b,
    typename Fps::ElemTy c
){
    int minsz = f.size()*2-1;
    using Elem = typename Fps::ElemTy;
    if(f.size() == 0) return PolynomialSubstitute_Ax_B(f.move(), b, c);
    if(a.val() == 0) return PolynomialSubstitute_Ax_B(f.move(), b, c).capSize(minsz).move();
    auto p = b / (a+a);
    f = PolynomialSubstitute_Ax_B(f.move(), a, c-p*p*a);
    Fps res(minsz);
    for(int i=0; i<f.size(); i++) res[i*2] = f[i];
    return PolynomialTaylorShift(res.move(), p);
}


// x <- ax / (bx + c)
//   d != 0
template<class Fps>
Fps PolynomialSubstitute_Ax_div_Bx_C(
    Fps f,
    typename Fps::ElemTy a,
    typename Fps::ElemTy b,
    typename Fps::ElemTy c
){
    int n = f.size();
    if(n == 0) return Fps(0);
    auto& comb = Fps::GetComb();
    comb.extend(n);
    auto One = comb.factorial(0);
    if((c-One).val() == 0){
        auto ci = c.inv();
        a *= ci; b *= ci; c = One;
    }
    auto p = One;
    for(int i=0; i<n; i++){ f[i] *= p; p *= a; }
    // x <- x / (bx + 1)
    if(b.val() != 0) f = FpsAntiTaylorShift(f.move(), -b);
    return f;
}


// x <- (ax + b) / (cx + d)
//   d != 0
template<class Fps>
Fps PolynomialSubstitute_Ax_B_div_Cx_D(
    Fps f,
    typename Fps::ElemTy a,
    typename Fps::ElemTy b,
    typename Fps::ElemTy c,
    typename Fps::ElemTy d
){
    int n = f.size();
    if(n == 0) return Fps(0);
    assert(d.val() != 0);
    // fix d = 1
    auto di = d.inv();
    a *= di; b *= di; c *= di; d *= di;
    if(c.val() == 0) return PolynomialSubstitute_Ax_B(f.move(), a, b).clip(0,n);
    // fix a/c != b/d
    if(a.val() == (c * b).val()) return Fps(n).set(0,f.eval(b)).move();
    f = PolynomialSubstitute_Ax_div_Bx_C(f.move(), a-c*b, c, d);
    if(b.val() != 0) f = PolynomialTaylorShift(f.move(), b);
    return f;
}

} // namespace nachia

void testcase(){
    i64 N, M; cin >> N >> M;
    auto f = Fps(N+1);
    rep(i,N+1) f[i] = Modint(M+1-i).pow(N) * (i%2?-1:1);
    f = nachia::PolynomialSubstitute_Ax_B_div_Cx_D(f.move(), 1, 0, -1, 1);
    Modint ans = 0;
    for(int m=1; m<=N && m<=M; m++){
        ans += f[m] * (m*2-1);
    }
    cout << ans.val() << '\n';
}

int main(){
    ios::sync_with_stdio(false); cin.tie(nullptr);
    testcase();
    return 0;
}
0