結果

問題 No.2348 Power!! (Easy)
ユーザー 👑 NachiaNachia
提出日時 2023-06-10 16:37:58
言語 C++17(gcc12)
(gcc 12.3.0 + boost 1.87.0)
結果
AC  
実行時間 299 ms / 5,000 ms
コード長 21,191 bytes
コンパイル時間 1,426 ms
コンパイル使用メモリ 90,456 KB
実行使用メモリ 6,820 KB
最終ジャッジ日時 2025-01-02 22:13:39
合計ジャッジ時間 4,876 ms
ジャッジサーバーID
(参考情報)
judge1 / judge4
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 8 ms
6,816 KB
testcase_01 AC 32 ms
6,820 KB
testcase_02 AC 39 ms
6,816 KB
testcase_03 AC 26 ms
6,816 KB
testcase_04 AC 2 ms
6,820 KB
testcase_05 AC 287 ms
6,820 KB
testcase_06 AC 282 ms
6,820 KB
testcase_07 AC 283 ms
6,816 KB
testcase_08 AC 283 ms
6,820 KB
testcase_09 AC 295 ms
6,820 KB
testcase_10 AC 299 ms
6,820 KB
testcase_11 AC 285 ms
6,816 KB
testcase_12 AC 284 ms
6,816 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#line 1 "..\\Main.cpp"
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <atcoder/modint>
#line 5 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\fps-ntt.hpp"
#include <cassert>
#line 3 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\math-modulo\\modulo-primitive-root.hpp"
#include <utility>

namespace nachia{

template<unsigned int MOD>
struct PrimitiveRoot{
    using u64 = unsigned long long;
    static constexpr u64 powm(u64 a, u64 i) {
        u64 res = 1, aa = a;
        while(i){
            if(i & 1) res = res * aa % MOD;
            aa = aa * aa % MOD;
            i /= 2;
        }
        return res;
    }
    static constexpr bool ExamineVal(unsigned int g){
        unsigned int t = MOD - 1;
        for(u64 d=2; d*d<=t; d++) if(t % d == 0){
            if(powm(g, (MOD - 1) / d) == 1) return false;
            while(t % d == 0) t /= d;
        }
        if(t != 1) if(powm(g, (MOD - 1) / t) == 1) return false;
        return true;
    }
    static constexpr unsigned int GetVal(){
        for(unsigned int x=2; x<MOD; x++) if(ExamineVal(x)) return x;
        return 0;
    }
    static const unsigned int val = GetVal();
};

} // namespace nachia
#line 3 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\math\\combination.hpp"

namespace nachia{

template<class Modint>
class Comb{
private:
    static constexpr int MOD = Modint::mod();
    std::vector<Modint> F;
    std::vector<Modint> iF;
public:
    void extend(int newN){
        int prevN = (int)F.size() - 1;
        if(newN >= MOD) newN = MOD - 1;
        if(prevN >= newN) return;
        F.resize(newN+1);
        iF.resize(newN+1);
        for(int i=prevN+1; i<=newN; i++) F[i] = F[i-1] * Modint::raw(i);
        iF[newN] = F[newN].inv();
        for(int i=newN; i>prevN; i--) iF[i-1] = iF[i] * Modint::raw(i);
    }
    Comb(int n = 1){
        F.assign(2, Modint(1));
        iF.assign(2, Modint(1));
        extend(n);
    }
    Modint factorial(int n) const { return F[n]; }
    Modint invFactorial(int n) const { return iF[n]; }
    Modint invOf(int n) const { return iF[n] * F[n-1]; }
    Modint comb(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return F[n] * iF[r] * iF[n-r];
    }
    Modint invComb(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return iF[n] * F[r] * F[n-r];
    }
    Modint perm(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return F[n] * iF[n-r];
    }
    Modint invPerm(int n, int r) const {
        if(n < 0 || n < r || r < 0) return Modint(0);
        return iF[n] * F[n-r];
    }
    Modint operator()(int n, int r) const { return comb(n,r); }
};

} // namespace nachia
#line 4 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\misc\\bit-operations.hpp"

namespace nachia{

int Popcount(unsigned long long c) noexcept {
#ifdef __GNUC__
    return __builtin_popcountll(c);
#else
    c = (c & (~0ull/3)) + ((c >> 1) & (~0ull/3));
    c = (c & (~0ull/5)) + ((c >> 2) & (~0ull/5));
    c = (c & (~0ull/17)) + ((c >> 4) & (~0ull/17));
    c = (c * (~0ull/257)) >> 56;
    return c;
#endif
}

// please ensure x != 0
int MsbIndex(unsigned long long x) noexcept {
#ifdef __GNUC__
    return 63 - __builtin_clzll(x);
#else
    int res = 0;
    for(int d=32; d>0; d>>=1) if(x >> d){ res |= d; x >>= d; }
    return res;
#endif
}

// please ensure x != 0
int LsbIndex(unsigned long long x) noexcept {
#ifdef __GNUC__
    return __builtin_ctzll(x);
#else
    return MsbIndex(x & -x);
#endif
}

}

#line 2 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\ntt-interface.hpp"

namespace nachia {

template<class mint>
struct NttInterface{

template<class Iter>
void Butterfly(Iter, int) const {}

template<class Iter>
void IButterfly(Iter, int) const {}

template<class Iter>
void BitReversal(Iter a, int N) const {
    for(int i=0, j=0; j<N; j++){
        if(i < j) std::swap(a[i], a[j]);
        for(int k = N>>1; k > (i^=k); k>>=1);
    }
}

};

} // namespace nachia
#line 5 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\ntt-acl.hpp"
#include <iterator>
#line 8 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\ntt-acl.hpp"
#include <array>

namespace nachia{
    
constexpr int bsf_constexpr(unsigned int n) {
    int x = 0;
    while (!(n & (1 << x))) x++;
    return x;
}

template <class mint>
struct NttFromAcl : NttInterface<mint> {

using u32 = unsigned int;
using u64 = unsigned long long;
    
static int ceil_pow2(int n) {
    int x = 0;
    while ((1U << x) < (u32)(n)) x++;
    return x;
}

struct fft_info {
    static constexpr u32 g = nachia::PrimitiveRoot<mint::mod()>::val;
    static constexpr int rank2 = bsf_constexpr(mint::mod()-1);
    std::array<mint, rank2+1> root;
    std::array<mint, rank2+1> iroot;

    std::array<mint, std::max(0, rank2-1)> rate2;
    std::array<mint, std::max(0, rank2-1)> irate2;

    std::array<mint, std::max(0, rank2-2)> rate3;
    std::array<mint, std::max(0, rank2-2)> irate3;

    fft_info(){
        root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for(int i=rank2-1; i>=0; i--){
            root[i] = root[i+1] * root[i+1];
            iroot[i] = iroot[i+1] * iroot[i+1];
        }
        mint prod = 1, iprod = 1;
        for(int i=0; i<=rank2-2; i++){
            rate2[i] = root[i+2] * prod;
            irate2[i] = iroot[i+2] * iprod;
            prod *= iroot[i+2];
            iprod *= root[i+2];
        }
        prod = 1; iprod = 1;
        for(int i=0; i<=rank2-3; i++){
            rate3[i] = root[i+3] * prod;
            irate3[i] = iroot[i+3] * iprod;
            prod *= iroot[i+3];
            iprod *= root[i+3];
        }
    }
};

template<class RandomAccessIterator>
void Butterfly(RandomAccessIterator a, int n) const {
    int h = ceil_pow2(n);

    static const fft_info info;

    int len = 0;
    while(len < h){
        if(h-len == 1){
            int p = 1 << (h-len-1);
            mint rot = 1;
            for(int s=0; s<(1<<len); s++){
                int offset = s << (h-len);
                for(int i=0; i<p; i++){
                    auto l = a[i+offset];
                    auto r = a[i+offset+p] * rot;
                    a[i+offset] = l+r;
                    a[i+offset+p] = l-r;
                }
                if(s+1 != (1<<len)) rot *= info.rate2[LsbIndex(~(u32)(s))];
            }
            len++;
        } else {
            int p = 1 << (h-len-2);
            mint rot = 1, imag = info.root[2];
            for(int s=0; s<(1<<len); s++){
                mint rot2 = rot * rot;
                mint rot3 = rot2 * rot;
                int offset = s << (h-len);
                for(int i=0; i<p; i++){
                    auto mod2 = 1ULL * mint::mod() * mint::mod();
                    auto a0 = 1ULL * a[i+offset].val();
                    auto a1 = 1ULL * a[i+offset+p].val() * rot.val();
                    auto a2 = 1ULL * a[i+offset+2*p].val() * rot2.val();
                    auto a3 = 1ULL * a[i+offset+3*p].val() * rot3.val();
                    auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val();
                    auto na2 = mod2 - a2;
                    a[i+offset] = a0 + a2 + a1 + a3;
                    a[i+offset+1*p] = a0 + a2 + (2 * mod2 - (a1 + a3));
                    a[i+offset+2*p] = a0 + na2 + a1na3imag;
                    a[i+offset+3*p] = a0 + na2 + (mod2 - a1na3imag);
                }
                if(s+1 != (1<<len)) rot *= info.rate3[LsbIndex(~(u32)(s))];
            }
            len += 2;
        }
    }
}

template<class RandomAccessIterator>
void IButterfly(RandomAccessIterator a, int n) const {
    int h = ceil_pow2(n);

    static const fft_info info;
    constexpr int MOD = mint::mod();

    int len = h;
    while(len){
        if(len == 1){
            int p = 1 << (h-len);
            mint irot = 1;
            for(int s=0; s<(1<<(len-1)); s++){
                int offset = s << (h-len+1);
                for(int i=0; i<p; i++){
                    auto l = a[i+offset];
                    auto r = a[i+offset+p];
                    a[i+offset] = l+r;
                    a[i+offset+p] = (u64)(MOD + l.val() - r.val()) * irot.val();
                }
                if(s+1 != (1<<(len-1))) irot *= info.irate2[LsbIndex(~(u32)(s))];
            }
            len--;
        } else {
            int p = 1 << (h-len);
            mint irot = 1, iimag = info.iroot[2];
            for(int s=0; s<(1<<(len-2)); s++){
                mint irot2 = irot * irot;
                mint irot3 = irot2 * irot;
                int offset = s << (h-len+2);
                for(int i=0; i<p; i++){
                    auto a0 = 1ULL * a[i+offset+0*p].val();
                    auto a1 = 1ULL * a[i+offset+1*p].val();
                    auto a2 = 1ULL * a[i+offset+2*p].val();
                    auto a3 = 1ULL * a[i+offset+3*p].val();

                    auto a2na3iimag = 1ULL * mint((MOD + a2 - a3) * iimag.val()).val();

                    a[i+offset] = a0 + a1 + a2 + a3;
                    a[i+offset+1*p] = (a0 + (MOD - a1) + a2na3iimag) * irot.val();
                    a[i+offset+2*p] = (a0 + a1 + (MOD - a2) + (MOD - a3)) * irot2.val();
                    a[i+offset+3*p] = (a0 + (MOD - a1) + (MOD - a2na3iimag)) * irot3.val();
                }
                if(s+1 != (1<<(len-2))) irot *= info.irate3[LsbIndex(~(u32)(s))];
            }
            len -= 2;
        }
    }
}

};

} // namespace nachia
#line 11 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\fps-ntt.hpp"

namespace nachia {

template<class Elem, class NttInst = NttFromAcl<Elem>>
struct FpsNtt {
public:
    using Fps = FpsNtt;
    using ElemTy = Elem;
    static constexpr unsigned int MOD = Elem::mod();
    static constexpr int CONV_THRES = 30;
    static const NttInst nttInst;
    static const unsigned int zeta = nachia::PrimitiveRoot<MOD>::GetVal();
private:
    using u32 = unsigned int;
    static Elem ZeroElem() noexcept { return Elem(0); }
    static Elem OneElem() noexcept { return Elem(1); }
    static Comb<Elem> comb;
    std::vector<Elem> a;
    int RSZ(int& sz) const { return sz = (sz < 0 ? size() : sz); }
public:

    int size() const noexcept { return a.size(); }
    Elem& operator[](int x) noexcept { return a[x]; }
    const Elem& operator[](int x) const noexcept { return a[x]; }
    Elem getCoeff(int x) const noexcept { return (0 <= x && x < size()) ? a[x] : ZeroElem(); }
    static Comb<Elem>& GetComb() { return comb; }
    static int BestNttSize(int x) noexcept { assert(x); return 1 << MsbIndex(x*2-1); }
    Fps move(){ return std::move(*this); }
    Fps& set(int i, Elem c){ a[i] = c; return *this; }

    Fps& removeLeadingZeros(){
        int newsz = size();
        while(newsz && a[newsz-1].val() == 0) newsz--;
        a.resize(newsz);
        if((int)a.capacity() / 4 > newsz) a.shrink_to_fit();
        return *this;
    }

    FpsNtt(){}
    FpsNtt(int sz) : a(sz, ZeroElem()) {}
    FpsNtt(int sz, Elem e) : a(sz, e) {}
    FpsNtt(std::vector<Elem>&& src) : a(std::move(src)) {}
    FpsNtt(const std::vector<Elem>& src) : a(src) {}
    
    Fps& ntt() {
        capSize(BestNttSize(size()));
        nttInst.Butterfly(a.begin(), size());
        return *this;
    }
    Fps& intt() {
        nttInst.IButterfly(a.begin(), a.size());
        return times(Elem::raw(size()).inv());
    }
    Fps nttDouble(Fps vanilla) const {
        int n = size();
        assert(n == (n&-n)); // n is a power of 2
        Elem q = Elem::raw(zeta).pow((Elem::mod() - 1) / (n*2));
        Elem qq = OneElem();
        for(int i=0; i<n; i++){ vanilla[i] *= qq; qq *= q; }
        vanilla.ntt();
        Fps res = clip(0, n*2);
        for(int i=0; i<n; i++) res[n+i] = vanilla[i];
        return res;
    }
    Fps nttDouble() const { return nttDouble(clip().intt().move()); }

    // Fps res(resSz);
    // for(int j=0; j<resSz-destL && j+srcL < srcR; j++) res[j+destL] = a.getCoeff(j+srcL)
    // if srcR is unspecified -> srcR = max(srcL, size());
    // if resSz is unspecified -> resSz = destL + srcR - srcL
    Fps clip(int srcL, int srcR = -1, int destL = 0, int resSz = -1) const {
        srcR = RSZ(srcR);
        if(resSz < 0) resSz = destL + srcR - srcL;
        int rj = std::min(std::min(srcR, size()) - srcL, resSz - destL);
        Fps res(resSz);
        for(int j=std::max(0, -srcL); j<rj; j++) res[j+destL] = a[j+srcL];
        return res;
    }
    Fps clip() const { return *this; }

    Fps& capSize(int l, int r) {
        if(r <= (int)size()) a.resize(r);
        if(size() <= l) a.resize(l, ZeroElem());
        return *this;
    }
    Fps& capSize(int z){ a.resize(RSZ(z), ZeroElem()); return *this; }
    Fps& times(Elem x){ for(int i=0; i<size(); i++){ a[i] *= x; } return *this; }
    Fps& timesFactorial(int z = -1){ comb.extend(RSZ(z)); for(int i=0; i<z; i++){ a[i] *= comb.factorial(i); } return *this; }
    Fps& timesInvFactorial(int z = -1){ comb.extend(RSZ(z)); for(int i=0; i<z; i++){ a[i] *= comb.invFactorial(i); } return *this; }
    Fps& clrRange(int l, int r){ for(int i=l; i<r; i++){ a[i] = ZeroElem(); } return *this; }
    Fps& negate(){ for(auto& e : a){ e = -e; } return *this; }
    Fps& mulEach(const Fps& other, int maxi = -1){
        maxi = std::min(RSZ(maxi), std::min(size(), other.size()));
        for(int i=0; i<maxi; i++) a[i] *= other[i];
        return *this;
    }
    Fps& reverse(int sz = -1){ RSZ(sz); std::reverse(a.begin(), a.begin() + sz); return *this; }

    static Fps convolution(const Fps& a, const Fps& b, int sz = -1){
        if(std::min(a.size(), b.size()) <= CONV_THRES){
            if(a.size() > b.size()) return convolution(b, a, sz);
            if(sz < 0) sz = std::max(0, a.size() + b.size() - 1);
            std::vector<Elem> res(sz);
            for(int i=0; i<a.size(); i++) for(int j=0; j<b.size() && i+j<sz; j++) res[i+j] += a[i] * b[j];
            return res;
        }
        int Z = BestNttSize(a.size() + b.size() - 1);
        return a.clip(0, Z).ntt().mulEach(b.clip(0, Z).ntt()).intt().capSize(sz).move();
    }
    Fps convolve(const Fps& r, int sz = -1) const { return convolution(*this, r, sz); }
    
    //   1
    // ----- = 1 + f + f^2 + f^3 + ...
    //  1-f
    Fps powerSum(int sz) const {
        RSZ(sz);
        if(sz == 0) return {};
        int q = std::min(sz, 32);
        Fps x = Fps(q).set(0, OneElem()).move();
        for(int i=1; i<q; i++) for(int j=1; j<=std::min(i,(int)a.size()-1); j++) x[i] += x[i-j] * a[j];
        while(x.size() < sz){
            int hN = x.size(), N = hN*2;
            Fps a = x.clip(0, N).ntt().move();
            Fps b = clip(0, N).ntt().mulEach(a).intt().clrRange(0,hN).ntt().mulEach(a).intt().move();
            for(int i=0; i<hN; i++) b[i] = x[i];
            std::swap(b, x);
        }
        return x.capSize(sz).move();
    }

    Fps inv(int sz = -1) const {
        RSZ(sz);
        Elem iA0 = a[0].inv();
        return clip(0, std::min(sz, size())).times(-iA0).set(0, ZeroElem()).powerSum(sz).times(iA0).move();
    }
    
    Fps& difference(){
        if(size() == 0) return *this;
        for(int i=0; i+1<size(); i++) a[i] = a[i+1] * Elem::raw(i+1);
        return capSize(size()-1);
    }
    Fps& integral(){
        if(size() == 0) return capSize(1);
        capSize(size()+1);
        comb.extend(size());
        for(int i=size()-1; i>=1; i--) a[i] = a[i-1] * comb.invOf(i);
        return set(0, ZeroElem());
    }
    Fps& EgfToOgf(){
        comb.extend(size());
        for(int i=0; i<size(); i++) a[i] *= comb.factorial(i);
        return *this;
    }
    Fps& OgfToEgf(){
        comb.extend(size());
        for(int i=0; i<size(); i++) a[i] *= comb.invFactorial(i);
        return *this;
    }
    
    Fps log(int sz = -1){
        RSZ(sz);
        assert(sz != 0);
        assert(a[0].val() == 1);
        return convolution(inv(sz), clip().difference(), sz-1).integral();
    }

    Fps exp(int sz = -1){
        RSZ(sz);
        Fps res = Fps(1).set(0, OneElem());
        while(res.size() < sz){
            auto z = res.size();
            auto tmp = res.capSize(z*2).log().set(0, -OneElem()).move();
            for(int i=0; i<z*2 && i<size(); i++) tmp[i] -= a[i];
            auto resntt = res.clip().ntt().mulEach(tmp.ntt()).intt().move();
            for(int i=z; i<z*2; i++) res[i] = -resntt[i];
        }
        return res.capSize(0, sz).move();
    }
    
    Fps pow(unsigned long long k, int sz = -1){
        int n = RSZ(sz);
        if(k == 0) return Fps(n).set(0, OneElem()).move();
        int ctz = 0;
        while(ctz<n && a[ctz].val() == 0) ctz++;
        if((unsigned long long)ctz >= (n-1) / k + 1) return Fps(n);
        Elem a0 = a[ctz];
        return clip(ctz, ctz+n-ctz*k).times(a0.inv()).log().times(Elem(k)).exp().times(a0.pow(k)).clip(0, -1, ctz*k);
    }

    auto begin(){ return a.begin(); }
    auto end(){ return a.end(); }
    auto begin() const { return a.begin(); }
    auto end() const { return a.end(); }

    std::string toString(std::string beg = "[ ", std::string delim = " ", std::string en = " ]") const {
        std::string res = beg;
        bool f = false;
        for(auto x : a){ if(f){ res += delim; } f = true; res += std::to_string(x.val()); }
        res += en;
        return res;
    }

    std::vector<Elem> getVectorMoved(){ return std::move(a); }

    Fps& operator+=(const Fps& r){
        capSize(std::max(size(), r.size()));
        for(int i=0; i<r.size(); i++) a[i] += r[i];
        return *this;
    }
    Fps& operator-=(const Fps& r){
        capSize(std::max(size(), r.size()));
        for(int i=0; i<r.size(); i++) a[i] -= r[i];
        return *this;
    }
    Fps operator+(const Fps& r) const { return (clip(0, std::max(size(), r.size())) += r).move(); }
    Fps operator-(const Fps& r) const { return (clip(0, std::max(size(), r.size())) -= r).move(); }
    Fps operator-() const { return (clip().negate()).move(); }
    Fps operator*(const Fps& r) const { return convolve(r).removeLeadingZeros().move(); }
    Fps& operator*=(const Fps& r){ return (*this) = operator*(r); }
    Fps& operator*=(Elem m){ return times(m); }
    Fps operator*(Elem m) const { return (clip() *= m).move(); }

    Elem eval(Elem x) const {
        Elem res = 0;
        for(int i=size()-1; i>=0; i--) res = res * x + a[i];
        return res;
    }
};

template<class Elem, class NttInst> Comb<Elem> FpsNtt<Elem, NttInst>::comb;
template<class Elem, class NttInst> const NttInst FpsNtt<Elem, NttInst>::nttInst;

} // namespace nachia

#line 4 "D:\\Programming\\VSCode\\competitive-cpp\\nachia\\fps\\evaluate-on-geometric-sequence.hpp"

namespace nachia{

template<class Fps>
Fps EvaluateOnGeometricSequence(const Fps& a, typename Fps::ElemTy w, int len){
    using Elem = typename Fps::ElemTy;
    int n = a.size();
    int x = std::max(n,len);
    if(len == 0) return Fps();
    if(w.val() == 0 || a.size() == 0) return Fps(std::vector<Elem>(len, a.getCoeff(0))).set(0,a.eval(1)).move();
    int nttsz = Fps::BestNttSize(n+len-1);
    Fps iwti(nttsz), wti(nttsz);
    iwti[0] = wti[0] = Elem::raw(1);
    for(int i=1; i<iwti.size(); i++) iwti[i] = iwti[i-1] * w;
    for(int i=1; i<wti.size(); i++) wti[i] = wti[i-1] * iwti[i-1];
    iwti[x-1] = wti[x-1].inv();
    for(int i=x-1; i>=1; i--) iwti[i-1] = iwti[i] * iwti[i-1];
    return a.clip(0, nttsz).mulEach(iwti, n).reverse(n)
        .ntt().mulEach(wti.ntt()).intt().clip(n-1, len+n-1).mulEach(iwti, len).move();
}

} // namespace nachia
#line 7 "..\\Main.cpp"
using namespace std;
using i32 = int;
using u32 = unsigned int;
using i64 = long long;
using u64 = unsigned long long;
#define rep(i,n) for(int i=0; i<(int)(n); i++)
const i64 INF = 1001001001001001001;

using Modint = atcoder::static_modint<998244353>;
using Fps = nachia::FpsNtt<Modint>;

int main(){
    int T; cin >> T;
    rep(t,T){
        Modint A; { int a; cin >> a; A = a; }
        int N; cin >> N;
        int n = 1; while((i64)(n+1)*(n+1) <= N) n++;
        Fps f(n);
        int off = N - n*n;
        Modint ans = 0;
        {
            Modint t0 = A;
            Modint t1 = 1;
            Modint t2 = 1;
            rep(i,off){
                ans += t2;
                t2 *= t1 * t1 * t0;
                t1 *= t0;
            }
        }
        Modint An = A.pow(n);
        Modint Ann = An.pow(n);
        {
            Modint t0 = A;
            Modint t1 = A.pow(off);
            Modint t2 = t1.pow(off);
            rep(i,n){
                f[i] = t2;
                t2 *= t1 * t1 * t0;
                t1 *= t0;
            }
        }
        auto ev = nachia::EvaluateOnGeometricSequence(f, An*An, n);
        {
            Modint t0 = Ann;
            Modint t1 = An.pow(off);
            Modint t2 = 1;
            rep(i,n){
                ans += ev[i] * t2;
                t2 *= t1 * t1 * t0;
                t1 *= t0;
            }
        }
        cout << ans.val() << '\n';
    }
    return 0;
}



struct ios_do_not_sync{
    ios_do_not_sync(){
        ios::sync_with_stdio(false);
        cin.tie(nullptr);
    }
} ios_do_not_sync_instance;

0