#include using namespace std; using int64 = long long; const int mod = 998244353; const int64 infll = (1LL << 62) - 1; const int inf = (1 << 30) - 1; struct IoSetup { IoSetup() { cin.tie(nullptr); ios::sync_with_stdio(false); cout << fixed << setprecision(10); cerr << fixed << setprecision(10); } } iosetup; template< typename T1, typename T2 > ostream &operator<<(ostream &os, const pair< T1, T2 > &p) { os << p.first << " " << p.second; return os; } template< typename T1, typename T2 > istream &operator>>(istream &is, pair< T1, T2 > &p) { is >> p.first >> p.second; return is; } template< typename T > ostream &operator<<(ostream &os, const vector< T > &v) { for(int i = 0; i < (int) v.size(); i++) { os << v[i] << (i + 1 != v.size() ? " " : ""); } return os; } template< typename T > istream &operator>>(istream &is, vector< T > &v) { for(T &in : v) is >> in; return is; } template< typename T1, typename T2 > inline bool chmax(T1 &a, T2 b) { return a < b && (a = b, true); } template< typename T1, typename T2 > inline bool chmin(T1 &a, T2 b) { return a > b && (a = b, true); } template< typename T = int64 > vector< T > make_v(size_t a) { return vector< T >(a); } template< typename T, typename... Ts > auto make_v(size_t a, Ts... ts) { return vector< decltype(make_v< T >(ts...)) >(a, make_v< T >(ts...)); } template< typename T, typename V > typename enable_if< is_class< T >::value == 0 >::type fill_v(T &t, const V &v) { t = v; } template< typename T, typename V > typename enable_if< is_class< T >::value != 0 >::type fill_v(T &t, const V &v) { for(auto &e : t) fill_v(e, v); } template< class T > struct Matrix { vector< vector< T > > A; Matrix() {} Matrix(size_t n, size_t m) : A(n, vector< T >(m, 0)) {} Matrix(size_t n) : A(n, vector< T >(n, 0)) {}; size_t height() const { return (A.size()); } size_t width() const { return (A[0].size()); } inline const vector< T > &operator[](int k) const { return (A.at(k)); } inline vector< T > &operator[](int k) { return (A.at(k)); } static Matrix I(size_t n) { Matrix mat(n); for(int i = 0; i < n; i++) mat[i][i] = 1; return (mat); } Matrix &operator+=(const Matrix &B) { size_t n = height(), m = width(); assert(n == B.height() && m == B.width()); for(int i = 0; i < n; i++) for(int j = 0; j < m; j++) (*this)[i][j] += B[i][j]; return (*this); } Matrix &operator-=(const Matrix &B) { size_t n = height(), m = width(); assert(n == B.height() && m == B.width()); for(int i = 0; i < n; i++) for(int j = 0; j < m; j++) (*this)[i][j] -= B[i][j]; return (*this); } Matrix &operator*=(const Matrix &B) { size_t n = height(), m = B.width(), p = width(); assert(p == B.height()); vector< vector< T > > C(n, vector< T >(m, 0)); for(int i = 0; i < n; i++) for(int j = 0; j < m; j++) for(int k = 0; k < p; k++) C[i][j] = (C[i][j] + (*this)[i][k] * B[k][j]); A.swap(C); return (*this); } Matrix &operator^=(long long k) { Matrix B = Matrix::I(height()); while(k > 0) { if(k & 1) B *= *this; *this *= *this; k >>= 1LL; } A.swap(B.A); return (*this); } Matrix operator+(const Matrix &B) const { return (Matrix(*this) += B); } Matrix operator-(const Matrix &B) const { return (Matrix(*this) -= B); } Matrix operator*(const Matrix &B) const { return (Matrix(*this) *= B); } Matrix operator^(const long long k) const { return (Matrix(*this) ^= k); } friend ostream &operator<<(ostream &os, Matrix &p) { size_t n = p.height(), m = p.width(); for(int i = 0; i < n; i++) { os << "["; for(int j = 0; j < m; j++) { os << p[i][j] << (j + 1 == m ? "]\n" : ","); } } return (os); } T determinant() { Matrix B(*this); assert(width() == height()); T ret = 1; for(int i = 0; i < width(); i++) { int idx = -1; for(int j = i; j < width(); j++) { if(B[j][i] != 0) idx = j; } if(idx == -1) return (0); if(i != idx) { ret *= -1; swap(B[i], B[idx]); } ret *= B[i][i]; T vv = B[i][i]; for(int j = 0; j < width(); j++) { B[i][j] /= vv; } for(int j = i + 1; j < width(); j++) { T a = B[j][i]; for(int k = 0; k < width(); k++) { B[j][k] -= B[i][k] * a; } } } return (ret); } }; template< int mod > struct ModInt { int x; ModInt() : x(0) {} ModInt(int64_t y) : x(y >= 0 ? y % mod : (mod - (-y) % mod) % mod) {} ModInt &operator+=(const ModInt &p) { if((x += p.x) >= mod) x -= mod; return *this; } ModInt &operator-=(const ModInt &p) { if((x += mod - p.x) >= mod) x -= mod; return *this; } ModInt &operator*=(const ModInt &p) { x = (int) (1LL * x * p.x % mod); return *this; } ModInt &operator/=(const ModInt &p) { *this *= p.inverse(); return *this; } ModInt operator-() const { return ModInt(-x); } ModInt operator+(const ModInt &p) const { return ModInt(*this) += p; } ModInt operator-(const ModInt &p) const { return ModInt(*this) -= p; } ModInt operator*(const ModInt &p) const { return ModInt(*this) *= p; } ModInt operator/(const ModInt &p) const { return ModInt(*this) /= p; } bool operator==(const ModInt &p) const { return x == p.x; } bool operator!=(const ModInt &p) const { return x != p.x; } ModInt inverse() const { int a = x, b = mod, u = 1, v = 0, t; while(b > 0) { t = a / b; swap(a -= t * b, b); swap(u -= t * v, v); } return ModInt(u); } ModInt pow(int64_t n) const { ModInt ret(1), mul(x); while(n > 0) { if(n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } friend ostream &operator<<(ostream &os, const ModInt &p) { return os << p.x; } friend istream &operator>>(istream &is, ModInt &a) { int64_t t; is >> t; a = ModInt< mod >(t); return (is); } static int get_mod() { return mod; } }; using modint = ModInt< mod >; template< typename T > struct FormalPowerSeries : vector< T > { using vector< T >::vector; using P = FormalPowerSeries; using MULT = function< P(P, P) >; using FFT = function< void(P &) >; static MULT &get_mult() { static MULT mult = nullptr; return mult; } static void set_mult(MULT f) { get_mult() = f; } static FFT &get_fft() { static FFT fft = nullptr; return fft; } static FFT &get_ifft() { static FFT ifft = nullptr; return ifft; } static void set_fft(FFT f, FFT g) { get_fft() = f; get_ifft() = g; } void shrink() { while(this->size() && this->back() == T(0)) this->pop_back(); } P operator+(const P &r) const { return P(*this) += r; } P operator+(const T &v) const { return P(*this) += v; } P operator-(const P &r) const { return P(*this) -= r; } P operator-(const T &v) const { return P(*this) -= v; } P operator*(const P &r) const { return P(*this) *= r; } P operator*(const T &v) const { return P(*this) *= v; } P operator/(const P &r) const { return P(*this) /= r; } P operator%(const P &r) const { return P(*this) %= r; } P &operator+=(const P &r) { if(r.size() > this->size()) this->resize(r.size()); for(int i = 0; i < r.size(); i++) (*this)[i] += r[i]; return *this; } P &operator+=(const T &r) { if(this->empty()) this->resize(1); (*this)[0] += r; return *this; } P &operator-=(const P &r) { if(r.size() > this->size()) this->resize(r.size()); for(int i = 0; i < r.size(); i++) (*this)[i] -= r[i]; shrink(); return *this; } P &operator-=(const T &r) { if(this->empty()) this->resize(1); (*this)[0] -= r; shrink(); return *this; } P &operator*=(const T &v) { const int n = (int) this->size(); for(int k = 0; k < n; k++) (*this)[k] *= v; return *this; } P &operator*=(const P &r) { if(this->empty() || r.empty()) { this->clear(); return *this; } assert(get_mult() != nullptr); return *this = get_mult()(*this, r); } P &operator%=(const P &r) { return *this -= *this / r * r; } P operator-() const { P ret(this->size()); for(int i = 0; i < this->size(); i++) ret[i] = -(*this)[i]; return ret; } P &operator/=(const P &r) { if(this->size() < r.size()) { this->clear(); return *this; } int n = this->size() - r.size() + 1; return *this = (rev().pre(n) * r.rev().inv(n)).pre(n).rev(n); } P dot(P r) const { P ret(min(this->size(), r.size())); for(int i = 0; i < ret.size(); i++) ret[i] = (*this)[i] * r[i]; return ret; } P pre(int sz) const { return P(begin(*this), begin(*this) + min((int) this->size(), sz)); } P operator>>(int sz) const { if(this->size() <= sz) return {}; P ret(*this); ret.erase(ret.begin(), ret.begin() + sz); return ret; } P operator<<(int sz) const { P ret(*this); ret.insert(ret.begin(), sz, T(0)); return ret; } P rev(int deg = -1) const { P ret(*this); if(deg != -1) ret.resize(deg, T(0)); reverse(begin(ret), end(ret)); return ret; } P diff() const { const int n = (int) this->size(); P ret(max(0, n - 1)); for(int i = 1; i < n; i++) ret[i - 1] = (*this)[i] * T(i); return ret; } P integral() const { const int n = (int) this->size(); P ret(n + 1); ret[0] = T(0); for(int i = 0; i < n; i++) ret[i + 1] = (*this)[i] / T(i + 1); return ret; } // F(0) must not be 0 P inv(int deg = -1) const { assert(((*this)[0]) != T(0)); const int n = (int) this->size(); if(deg == -1) deg = n; if(get_fft() != nullptr) { P ret(*this); ret.resize(deg, T(0)); return ret.inv_fast(); } P ret({T(1) / (*this)[0]}); for(int i = 1; i < deg; i <<= 1) { ret = (ret + ret - ret * ret * pre(i << 1)).pre(i << 1); } return ret.pre(deg); } // F(0) must be 1 P log(int deg = -1) const { assert((*this)[0] == 1); const int n = (int) this->size(); if(deg == -1) deg = n; return (this->diff() * this->inv(deg)).pre(deg - 1).integral(); } P sqrt(int deg = -1) const { const int n = (int) this->size(); if(deg == -1) deg = n; if((*this)[0] == T(0)) { for(int i = 1; i < n; i++) { if((*this)[i] != T(0)) { if(i & 1) return {}; if(deg - i / 2 <= 0) break; auto ret = (*this >> i).sqrt(deg - i / 2) << (i / 2); if(ret.size() < deg) ret.resize(deg, T(0)); return ret; } } return P(deg, 0); } P ret({T(1)}); T inv2 = T(1) / T(2); for(int i = 1; i < deg; i <<= 1) { ret = (ret + pre(i << 1) * ret.inv(i << 1)) * inv2; } return ret.pre(deg); } // F(0) must be 0 P exp(int deg = -1) const { assert((*this)[0] == T(0)); const int n = (int) this->size(); if(deg == -1) deg = n; if(get_fft() != nullptr) { P ret(*this); ret.resize(deg, T(0)); return ret.exp_rec(); } P ret({T(1)}); for(int i = 1; i < deg; i <<= 1) { ret = (ret * (pre(i << 1) + T(1) - ret.log(i << 1))).pre(i << 1); } return ret.pre(deg); } P online_convolution_exp(const P &conv_coeff) const { const int n = (int) conv_coeff.size(); assert((n & (n - 1)) == 0); vector< P > conv_ntt_coeff; for(int i = n; i >= 1; i >>= 1) { P g(conv_coeff.pre(i)); get_fft()(g); conv_ntt_coeff.emplace_back(g); } P conv_arg(n), conv_ret(n); auto rec = [&](auto rec, int l, int r, int d) -> void { if(r - l <= 16) { for(int i = l; i < r; i++) { T sum = 0; for(int j = l; j < i; j++) sum += conv_arg[j] * conv_coeff[i - j]; conv_ret[i] += sum; conv_arg[i] = i == 0 ? T(1) : conv_ret[i] / i; } } else { int m = (l + r) / 2; rec(rec, l, m, d + 1); P pre(r - l); for(int i = 0; i < m - l; i++) pre[i] = conv_arg[l + i]; get_fft()(pre); for(int i = 0; i < r - l; i++) pre[i] *= conv_ntt_coeff[d][i]; get_ifft()(pre); for(int i = 0; i < r - m; i++) conv_ret[m + i] += pre[m + i - l]; rec(rec, m, r, d + 1); } }; rec(rec, 0, n, 0); return conv_arg; } P exp_rec() const { assert((*this)[0] == T(0)); const int n = (int) this->size(); int m = 1; while(m < n) m *= 2; P conv_coeff(m); for(int i = 1; i < n; i++) conv_coeff[i] = (*this)[i] * i; return online_convolution_exp(conv_coeff).pre(n); } P inv_fast() const { assert(((*this)[0]) != T(0)); const int n = (int) this->size(); P res{T(1) / (*this)[0]}; for(int d = 1; d < n; d <<= 1) { P f(2 * d), g(2 * d); for(int j = 0; j < min(n, 2 * d); j++) f[j] = (*this)[j]; for(int j = 0; j < d; j++) g[j] = res[j]; get_fft()(f); get_fft()(g); for(int j = 0; j < 2 * d; j++) f[j] *= g[j]; get_ifft()(f); for(int j = 0; j < d; j++) { f[j] = 0; f[j + d] = -f[j + d]; } get_fft()(f); for(int j = 0; j < 2 * d; j++) f[j] *= g[j]; get_ifft()(f); for(int j = 0; j < d; j++) f[j] = res[j]; res = f; } return res.pre(n); } P pow(int64_t k, int deg = -1) const { const int n = (int) this->size(); if(deg == -1) deg = n; for(int i = 0; i < n; i++) { if((*this)[i] != T(0)) { T rev = T(1) / (*this)[i]; P ret = (((*this * rev) >> i).log() * k).exp() * ((*this)[i].pow(k)); if(i * k > deg) return P(deg, T(0)); ret = (ret << (i * k)).pre(deg); if(ret.size() < deg) ret.resize(deg, T(0)); return ret; } } return *this; } T eval(T x) const { T r = 0, w = 1; for(auto &v : *this) { r += w * v; w *= x; } return r; } P pow_mod(int64_t n, P mod) const { if(1) { auto get_div = [](P base, const P &div) { if(base.size() < div.size()) { base.clear(); return base; } int n = base.size() - div.size() + 1; return (base.rev().pre(n) * div.pre(n)).pre(n).rev(n); }; P modinv = mod.rev().inv(); P x(*this), ret{1}; while(n > 0) { if(n & 1) { ret *= x; ret -= get_div(ret, modinv) * mod; } x *= x; x -= get_div(x, modinv) * mod; n >>= 1; } return ret; } P x(*this), ret{1}; while(n > 0) { if(n & 1) (ret *= x) %= mod; (x *= x) %= mod; n >>= 1; } return ret; } }; template< typename Mint > struct NumberTheoreticTransformFriendlyModInt { vector< int > rev; vector< Mint > rts; int base, max_base; Mint root; NumberTheoreticTransformFriendlyModInt() : base(1), rev{0, 1}, rts{0, 1} { const int mod = Mint::get_mod(); assert(mod >= 3 && mod % 2 == 1); auto tmp = mod - 1; max_base = 0; while(tmp % 2 == 0) tmp >>= 1, max_base++; root = 2; while(root.pow((mod - 1) >> 1) == 1) root += 1; assert(root.pow(mod - 1) == 1); root = root.pow((mod - 1) >> max_base); } void ensure_base(int nbase) { if(nbase <= base) return; rev.resize(1 << nbase); rts.resize(1 << nbase); for(int i = 0; i < (1 << nbase); i++) { rev[i] = (rev[i >> 1] >> 1) + ((i & 1) << (nbase - 1)); } assert(nbase <= max_base); while(base < nbase) { Mint z = root.pow(1 << (max_base - 1 - base)); for(int i = 1 << (base - 1); i < (1 << base); i++) { rts[i << 1] = rts[i]; rts[(i << 1) + 1] = rts[i] * z; } ++base; } } void ntt(vector< Mint > &a) { const int n = (int) a.size(); assert((n & (n - 1)) == 0); int zeros = __builtin_ctz(n); ensure_base(zeros); int shift = base - zeros; for(int i = 0; i < n; i++) { if(i < (rev[i] >> shift)) { swap(a[i], a[rev[i] >> shift]); } } for(int k = 1; k < n; k <<= 1) { for(int i = 0; i < n; i += 2 * k) { for(int j = 0; j < k; j++) { Mint z = a[i + j + k] * rts[j + k]; a[i + j + k] = a[i + j] - z; a[i + j] = a[i + j] + z; } } } } void intt(vector< Mint > &a) { const int n = (int) a.size(); ntt(a); reverse(a.begin() + 1, a.end()); Mint inv_sz = Mint(1) / n; for(int i = 0; i < n; i++) a[i] *= inv_sz; } vector< Mint > multiply(vector< Mint > a, vector< Mint > b) { int need = a.size() + b.size() - 1; int nbase = 1; while((1 << nbase) < need) nbase++; ensure_base(nbase); int sz = 1 << nbase; a.resize(sz, 0); b.resize(sz, 0); ntt(a); ntt(b); Mint inv_sz = Mint(1) / sz; for(int i = 0; i < sz; i++) { a[i] *= b[i] * inv_sz; } reverse(a.begin() + 1, a.end()); ntt(a); a.resize(need); return a; } }; template< class T > FormalPowerSeries< T > berlekamp_massey(const FormalPowerSeries< T > &s) { const int N = (int) s.size(); FormalPowerSeries< T > b = {T(-1)}, c = {T(-1)}; T y = T(1); for(int ed = 1; ed <= N; ed++) { int l = int(c.size()), m = int(b.size()); T x = 0; for(int i = 0; i < l; i++) x += c[i] * s[ed - l + i]; b.emplace_back(0); m++; if(x == T(0)) continue; T freq = x / y; if(l < m) { auto tmp = c; c.insert(begin(c), m - l, T(0)); for(int i = 0; i < m; i++) c[m - 1 - i] -= freq * b[m - 1 - i]; b = tmp; y = x; } else { for(int i = 0; i < m; i++) c[l - 1 - i] -= freq * b[m - 1 - i]; } } return c; } template< typename T > using FPSGraph = vector< vector< pair< int, T > > >; template< typename T > FormalPowerSeries< T > random_poly(int n) { mt19937 mt(1333333); FormalPowerSeries< T > res(n); uniform_int_distribution< int > rand(0, T::get_mod() - 1); for(int i = 0; i < n; i++) res[i] = rand(mt); return res; } template< typename T > FormalPowerSeries< T > next_poly(const FormalPowerSeries< T > &dp, const FPSGraph< T > &g) { const int N = (int) dp.size(); FormalPowerSeries< T > nxt(N); for(int i = 0; i < N; i++) { for(auto &p : g[i]) nxt[p.first] += p.second * dp[i]; } return nxt; } template< typename T > FormalPowerSeries< T > minimum_poly(const FPSGraph< T > &g) { const int N = (int) g.size(); auto dp = random_poly< T >(N), u = random_poly< T >(N); FormalPowerSeries< T > f(2 * N); for(int i = 0; i < 2 * N; i++) { for(auto &p : u.dot(dp)) f[i] += p; dp = next_poly(dp, g); } return berlekamp_massey(f); } /* 行列累乗: nexの計算量をO(S)として O(N(N+S) + N log N log Q) */ template< typename T > FormalPowerSeries< T > sparse_pow(int64_t Q, FormalPowerSeries< modint > dp, const FPSGraph< T > &g) { const int N = (int) dp.size(); auto A = FormalPowerSeries< T >({0, 1}).pow_mod(Q, minimum_poly(g)); FormalPowerSeries< T > res(N); for(int i = 0; i < A.size(); i++) { res += dp * A[i]; dp = next_poly(dp, g); } return res; } /* 行列式: 非0の要素をS個として O(N(N+S)) */ template< typename T > T sparse_determinant(FPSGraph< T > g) { using FPS = FormalPowerSeries< T >; int N = (int) g.size(); auto C = random_poly< T >(N); for(int i = 0; i < N; i++) for(auto &p : g[i]) p.second *= C[i]; auto u = minimum_poly(g); T acdet = u[0]; if(N % 2 == 0) acdet *= -1; T cdet = 1; for(int i = 0; i < N; i++) cdet *= C[i]; return acdet / cdet; } int main() { using FPS = FormalPowerSeries< modint >; NumberTheoreticTransformFriendlyModInt< modint > fft; FPS::set_fft([&](FPS &a) { fft.ntt(a); }, [&](FPS &a) { fft.intt(a); }); FPS::set_mult([&](const FPS &a, const FPS &b) { auto c = fft.multiply(a, b); return FPS(begin(c), end(c)); }); int N, K; cin >> N >> K; Matrix< modint > uku(K * K * K); auto to_idx = [&](int x, int y, int z) { return x * K * K + y * K + z; }; for(int i = 0; i < K; i++) { // ほ for(int j = 0; j < K; j++) { // ほむ for(int k = 0; k < K; k++) { // ほむら // ほ uku[to_idx(i, j, k)][to_idx((i + 1) % K, j, k)] += 1; // む uku[to_idx(i, j, k)][to_idx(i, (i + j) % K, k)] += 1; // ら uku[to_idx(i, j, k)][to_idx(i, j, (j + k) % K)] += 1; } } } FPSGraph< modint > ord(K * K * K); for(int i = 0; i < K * K * K; i++) { for(int j = 0; j < K * K * K; j++) { if(uku[i][j].x != 0) ord[i].emplace_back(j, uku[i][j]); } } FPS dp(K * K * K); dp[0] = 1; auto mat = sparse_pow< modint >(N, dp, ord); modint ret = 0; for(int i = 0; i < K; i++) { for(int j = 0; j < K; j++) { ret += mat[to_idx(i, j, 0)]; } } cout << ret << endl; }