#pragma GCC optimize("Ofast,unroll-loops") #pragma GCC target("avx2,popcnt") #include using namespace std; const int P = 998244353; int main() { ios::sync_with_stdio(false); cin.tie(nullptr); int N, M, K; cin >> N >> M >> K; vector A(N); for (int i = 0; i < N; i++) { cin >> A[i]; } int Q = N - M + 1; vector ans(Q); vector> querys; for (int i = 0; i < Q; i++) { querys.emplace_back(i, i + M, i); } vector> dp(N + 1, vector(K)); auto add = [&](vector &f, int v) { vector g = f; for (int i = 0; i < K; i++) { int j = (i + v < K ? i + v : i + v - K); g[j] += f[i]; if (g[j] >= P) { g[j] -= P; } } return g; }; auto dfs = [&](auto &dfs, int L, int R, vector> &query) { vector> qL, qM, qR; int M = (L + R) / 2, lb = M, rb = M; for (auto &[l, r, i] : query) { if (l <= M && M <= r) { lb = min(lb, L); rb = max(rb, R); qM.emplace_back(l, r, i); } else if (r < M) { qL.emplace_back(l, r, i); } else { qR.emplace_back(l, r, i); } } for (int j = 0; j < K; j++) { dp[M][j] = (j == 0); } for (int i = M - 1; i >= lb; i--) { dp[i] = add(dp[i + 1], A[i]); } for (int i = M; i < rb; i++) { dp[i + 1] = add(dp[i], A[i]); } for (auto [l, r, i] : qM) { int res = P - 1; for (int j = 0; j < K; j++) { int k = (j == 0 ? 0 : K - j); res += 1ll * dp[l][j] * dp[r][k] % P; if (res >= P) { res -= P; } } ans[i] = res; } if (L + 1 == R) { return; } else { dfs(dfs, L, M, qL); dfs(dfs, M, R, qR); } }; dfs(dfs, 0, N, querys); for (int i = 0; i < Q; i++) { cout << ans[i] << "\n"; } }