#pragma GCC target("avx2") #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") #include #define MOD 998244353 int ri() { int n; scanf("%d", &n); return n; } int main() { int n = ri(); int k = ri(); int kk = k * k; int kkk = k * k * k; int a[kkk][kkk]; memset(a, 0, sizeof(a)); for (int i = 0; i < k*k*k; i++) { int x = i / kk; int y = i / k % k; int z = i % k; a[i][x * kk + y * k + (z + 1) % k]++; a[i][x * kk + (y + z) % k * k + z]++; a[i][(x + y) % k * kk + y * k + z]++; } std::vector res(kkk, 0); std::vector tmp(kkk); res[0] = 1; int kkk_1 = (kkk&0x7FFFFFF0); int kkk_f = (kkk&0xF); while (n) { if (n & 1) { tmp.assign(kkk, 0); for (int i = 0; i < kkk; i++) for (int j = 0; j < kkk; j++) { tmp[j] += (int64_t) res[i] * a[i][j] % MOD; if (tmp[j] >= MOD) tmp[j] -= MOD; } std::swap(tmp, res); } int b[kkk][kkk]; memset(b, 0, sizeof(b)); for (int i = 0; i < kkk; i++) { for (int j = 0; j < kkk; j++) { uint64_t sum = 0; for (int l = 0; l < kkk_1; l += 0x10) { sum += (uint64_t) a[i][l] * a[l][j]; sum += (uint64_t) a[i][l + 1] * a[l + 1][j]; sum += (uint64_t) a[i][l + 2] * a[l + 2][j]; sum += (uint64_t) a[i][l + 3] * a[l + 3][j]; sum += (uint64_t) a[i][l + 4] * a[l + 4][j]; sum += (uint64_t) a[i][l + 5] * a[l + 5][j]; sum += (uint64_t) a[i][l + 6] * a[l + 6][j]; sum += (uint64_t) a[i][l + 7] * a[l + 7][j]; sum += (uint64_t) a[i][l + 8] * a[l + 8][j]; sum += (uint64_t) a[i][l + 9] * a[l + 9][j]; sum += (uint64_t) a[i][l + 10] * a[l + 10][j]; sum += (uint64_t) a[i][l + 11] * a[l + 11][j]; sum += (uint64_t) a[i][l + 12] * a[l + 12][j]; sum += (uint64_t) a[i][l + 13] * a[l + 13][j]; sum += (uint64_t) a[i][l + 14] * a[l + 14][j]; sum += (uint64_t) a[i][l + 15] * a[l + 15][j]; sum %= MOD; } for (int l = 0; l < kkk_f; l++) { sum += (uint64_t) a[i][kkk_1 + l] * a[kkk_1 + l][j]; } b[i][j] = sum % MOD; } } memcpy(a, b, sizeof(a)); n /= 2; } int ans = 0; for (int i = 0; i < kk; i++) { ans += res[i]; if (ans >= MOD) ans -= MOD; } std::cout << ans << std::endl; return 0; }