//Let's join Kaede Takagaki Fan Club !! #pragma GCC optimize("Ofast") #pragma GCC optimize("unroll-loops") #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; #define int long long //#define L __int128 typedef long long ll; typedef pair P; typedef pair P1; typedef pair P2; #define pu push #define pb push_back #define eb emplace_back #define mp make_pair #define eps 1e-7 #define INF 1000000000 #define a first #define b second #define fi first #define sc second //#define rng(i,a,b) for(int i=(int)(a);i<(int)(b);i++) #define rep(i,x) for(int i=0;i bool chmax(t&a,u b){if(a bool chmin(t&a,u b){if(b using vc=vector; template ostream& operator<<(ostream& os,const pair& p){ return os<<"{"< ostream& operator<<(ostream& os,const vc& v){ os<<"{"; for(auto e:v)os<> 30)) * 0xbf58476d1ce4e5b9; x = (x ^ (x >> 27)) * 0x94d049bb133111eb; return x ^ (x >> 31); } size_t operator()(uint64_t x) const { static const uint64_t FIXED_RANDOM = chrono::steady_clock::now().time_since_epoch().count(); return splitmix64(x + FIXED_RANDOM); } //don't make x negative! size_t operator()(pair x)const{ return operator()(uint64_t(x.first)<<32|x.second); } }; //unordered_set -> dtype, null_type //unordered_map -> dtype(key), dtype(value) using namespace __gnu_pbds; template using hash_table=gp_hash_table; template void g(T &a){ cin >> a; } template void o(const T &a,bool space=false){ cout << a << (space?' ':'\n'); } //ios::sync_with_stdio(false); const ll mod = 998244353; //const ll mod = 1000000007; mt19937_64 mt(chrono::steady_clock::now().time_since_epoch().count()); template void add(T&a,T b){ a+=b; if(a >= mod) a-=mod; } ll modpow(ll x,ll n){ ll res=1; while(n>0){ if(n&1) res=res*x%mod; x=x*x%mod; n>>=1; } return res; } #define _sz 400005 ll F[_sz],R[_sz]; void make(){ F[0] = 1; for(int i=1;i<_sz;i++) F[i] = F[i-1]*i%mod; R[_sz-1] = modpow(F[_sz-1], mod-2); for(int i=_sz-2;i>=0;i--) R[i] = R[i+1] * (i+1) % mod; } ll C(int a,int b){ if(b < 0 || a < b) return 0; return F[a]*R[b]%mod*R[a-b]%mod; } //o(ans?"Yes":"No"); typedef vector vi; vi shrink(vi a){ while(a.size() && a.back() == 0) a.pop_back(); return a; } vi mul_int(vi a, int M){ for(auto &b: a) b = (int)((ll)(b) * M) % mod; return a; } template struct ntt{ inline void add(int &a, int b) { a += b; if(a >= md) a -= md; } inline void sub(int &a, int b) { a -= b; if(a < 0) a += md; } inline int add2(int a, int b) { a += b; if(a >= md) a -= md; return a;} inline int sub2(int a, int b) { a -= b; if(a < 0) a += md; return a;} inline int mul(int a, int b) { return (int)((ll)a*b%md); } inline int power(int a, long long b) { int res = 1; while (b > 0) { if (b & 1) res = mul(res, a); a = mul(a, a); b >>= 1; } return res; } inline int inv(int a) { a %= md; if (a < 0) a += md; int b = md, u = 0, v = 1; while (a) { int t = b / a; b -= t * a; swap(a, b); u -= t * v; swap(u, v); } assert(b == 1); if (u < 0) u += md; return u; } int max_base, root; vector dw, idw; ntt() { int tmp = md - 1; max_base = 0; while (tmp % 2 == 0) { tmp /= 2; max_base++; } root = 2; while (power(root, (md-1)>>1) == 1) root++; dw.resize(max_base); idw.resize(max_base); rep(i, max_base){ sub(dw[i], power(root, (md-1) >> (i+2))); idw[i] = inv(dw[i]); } } void fft(vector &a, bool inv) { const int n = a.size(); assert((n & (n - 1)) == 0); assert(__builtin_ctz(n) <= max_base); if(!inv){ for(int m=n;m>>=1;){ int w = 1; for(int s=0,k=0; s=y?x-y:x+md-y); a[i] = (x+y>=md?x+y-md:x+y); } w = mul(w, dw[__builtin_ctz(++k)]); } } } else{ for(int m=1;m=y?x-y:x+md-y); a[j] = mul(a[j], w); a[i] = (x+y>=md?x+y-md:x+y); } w = mul(w, idw[__builtin_ctz(++k)]); } } } } vector multiply(vector a, vector b, int eq = 0) { int need = a.size() + b.size() - 1; int nbase = 0; while ((1 << nbase) < need) nbase++; int sz = 1 << nbase; a.resize(sz); b.resize(sz); fft(a, 0); if (eq) b = a; else fft(b, 0); int inv_sz = inv(sz); for (int i = 0; i < sz; i++) { a[i] = mul(mul(a[i], b[i]), inv_sz); } fft(a, 1); a.resize(need); return a; } vector square(vector a) { return multiply(a, a, 1); } }; ntt<998244353>f; vi mul(vi a, vi b, int eq = 0){ return f.multiply(a, b, eq); } vi add(vi a, vi b,int M=-1){ if(a.size() < b.size()) swap(a,b); for(int i=0;i= mod) a[i] -= mod; } if(M >= 0 && a.size() > M) a.resize(M); return a; } vi sub(vi a, vi b,int M=-1){ if(a.size() < b.size()) a.resize(b.size(), 0); for(int i=0;i= mod) a[i] -= mod; } if(M >= 0 && a.size() > M) a.resize(M); return a; } vi lw(vi a, int x){ if(a.size() > x) a.resize(x); return a; } vi inv(vi a,int M){ if(a.empty() || a[0] == 0) return vi(); vi ret(M); ret[0] = modpow(a[0],mod-2); int cur = 1; int nxt = 1; while(cur < M){ auto at = lw(ret, cur); ret = sub(add(at, at), mul(mul(at, at, 1), lw(a, cur*2))); ret.resize(cur << 1); nxt++; cur <<= 1; } assert(ret.size() >= M); ret.resize(M); return ret; } vi modpow(vi a, ll n, vi b){ vi rb = b; reverse(all(rb)); rb = inv(rb, rb.size()); auto get_mod = [&](vi v){ vi dv, u = v; if(v.size() < b.size()) dv = {}; else{ int sz = v.size() - b.size() + 1; vi y = lw(rb, sz); reverse(all(v)); v = lw(v, sz); dv = mul(v, y); dv.resize(sz); reverse(all(dv)); } u = sub(u, mul(dv, b)); return shrink(u); }; if(a.size() >= b.size()){ a = get_mod(a); } assert(a.size() < b.size()); vi ret = {1}; while(n){ if(n & 1){ ret = mul(ret, a); ret = get_mod(ret); } n >>= 1; a = mul(a, a, 1); a = get_mod(a); } return ret; } vectorBerlekampMassey(vectorx){ vectorls,cur; int lf,ld; rep(i,x.size()){ ll t = 0; for(int j=0;jc(i-lf-1); c.pb(k); rep(j,ls.size()) c.pb(-ls[j]*k%mod); if(c.size() < cur.size()) c.resize(cur.size()); rep(j,cur.size()){ c[j]=(c[j]+cur[j])%mod; } if(i-lf+(int)(ls.size()) >= (int)(cur.size())){ ls = cur, lf = i, ld = (t-x[i])%mod; } cur = c; } rep(i,cur.size()) cur[i] = (cur[i]%mod+mod)%mod; return cur; } //numは線形漸化的な列、特定に十分な長さがあるものとする (ない場合どうなるかは、不明) //0-indexed でn番目を返す ll calc_linear_nth(vectornum, ll n){ if(num.size() > n) return num[n]; auto rel = BerlekampMassey(num); //本当に特定できてるか知りたいなら、numを"""十分"""長くとった上で //assert(rel.size()*2+2 < num.size()); for(auto &at:rel) { if(at) at = mod - at; } reverse(all(rel)); rel.pb(1); auto ans = modpow({0, 1}, n, rel); ll ret = 0; rep(i, ans.size()){ ret += 1LL*ans[i]*num[i]%mod; } return (ret%mod+mod)%mod; } int n, m, dp[2][105][105]; void solve(){ cin >> n >> m; if(m==1){o(0);return;} for(int i=1;i<=m;i++) dp[0][i][1%i] = 1; int cur = 0, nxt = 1; vcans; rep(i, 10500){ memset(dp[nxt], 0, sizeof(dp[nxt])); vctmp(105, 0); int A = 0; rep(a, 105) repn(b, 104) if(dp[cur][a][b]) { add(tmp[a], dp[cur][a][b]); add(A , dp[cur][a][b]); } ans.pb(A); repn(j, m){ //j -> j rep(x, j) add(dp[nxt][j][(x+1)%j], dp[cur][j][x]); //other -> j add(dp[nxt][j][1%j], (A+mod-tmp[j])%mod); } swap(cur, nxt); } o(calc_linear_nth(ans, n-1)); } signed main(){ cin.tie(0); ios::sync_with_stdio(0); cout<> t; while(t--) solve(); }