#include #include const int Mod = 998244353, bit[21] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576}, bit_inv[21] = {1, 499122177, 748683265, 873463809, 935854081, 967049217, 982646785, 990445569, 994344961, 996294657, 997269505, 997756929, 998000641, 998122497, 998183425, 998213889, 998229121, 998236737, 998240545, 998242449, 998243401}, root[21] = {1, 998244352, 911660635, 372528824, 929031873, 452798380, 922799308, 781712469, 476477967, 166035806, 258648936, 584193783, 63912897, 350007156, 666702199, 968855178, 629671588, 24514907, 996173970, 363395222, 565042129}, root_inv[21] = {1, 998244352, 86583718, 509520358, 337190230, 87557064, 609441965, 135236158, 304459705, 685443576, 381598368, 335559352, 129292727, 358024708, 814576206, 708402881, 283043518, 3707709, 121392023, 704923114, 950391366}; int ntt_b[21][1048576], ntt_c[21][1048576], ntt_x[21][1048576], ntt_y[21][1048576]; long long div_mod(long long x, long long y, long long z) { if (x % y == 0) return x / y; else return (div_mod((1 + x / y) * y - x, (z % y), y) * z + x) / y; } void NTT(int k, int a[], int z[]) { if (k == 0) { z[0] = a[0]; return; } int i, d = bit[k-1], tmpp; long long tmp; for (i = 0; i < d; i++) { ntt_b[k][i] = a[i*2]; ntt_c[k][i] = a[i*2+1]; } NTT(k - 1, ntt_b[k], ntt_x[k]); NTT(k - 1, ntt_c[k], ntt_y[k]); for (i = 0, tmp = 1; i < d; i++, tmp = tmp * root[k] % Mod) { tmpp = tmp * ntt_y[k][i] % Mod; z[i] = ntt_x[k][i] + tmpp; if (z[i] >= Mod) z[i] -= Mod; z[i+d] = ntt_x[k][i] - tmpp; if (z[i+d] < 0) z[i+d] += Mod; } } void NTT_reverse(int k, int z[], int a[]) { if (k == 0) { a[0] = z[0]; return; } int i, d = bit[k-1], tmpp; long long tmp; for (i = 0; i < d; i++) { ntt_x[k][i] = z[i*2]; ntt_y[k][i] = z[i*2+1]; } NTT_reverse(k - 1, ntt_x[k], ntt_b[k]); NTT_reverse(k - 1, ntt_y[k], ntt_c[k]); for (i = 0, tmp = 1; i < d; i++, tmp = tmp * root_inv[k] % Mod) { tmpp = tmp * ntt_c[k][i] % Mod; a[i] = ntt_b[k][i] + tmpp; if (a[i] >= Mod) a[i] -= Mod; a[i+d] = ntt_b[k][i] - tmpp; if (a[i+d] < 0) a[i+d] += Mod; } } // Compute the product of two polynomials a[0-da] and b[0-db] using NTT in O(d * log d) time void prod_poly_NTT(int da, int db, int a[], int b[], int c[]) { int i, k; static int aa[1048576], bb[1048576], cc[1048576]; for (k = 0; bit[k] <= da + db; k++); for (i = 0; i <= da; i++) aa[i] = a[i]; for (i = da + 1; i < bit[k]; i++) aa[i] = 0; for (i = 0; i <= db; i++) bb[i] = b[i]; for (i = db + 1; i < bit[k]; i++) bb[i] = 0; static int x[1048576], y[1048576], z[1048576]; NTT(k, aa, x); if (db == da) { for (i = 0; i <= da; i++) if (a[i] != b[i]) break; if (i <= da) NTT(k, bb, y); else for (i = 0; i < bit[k]; i++) y[i] = x[i]; } else NTT(k, bb, y); for (i = 0; i < bit[k]; i++) z[i] = (long long)x[i] * y[i] % Mod; NTT_reverse(k, z, cc); for (i = 0; i <= da + db; i++) c[i] = (long long)cc[i] * bit_inv[k] % Mod; } // Compute the product of two polynomials a[0-da] and b[0-db] naively in O(da * db) time void prod_poly_naive(int da, int db, int a[], int b[], int c[]) { int i, j; for (i = 0; i <= da + db; i++) c[i] = 0; for (i = 0; i <= da; i++) { for (j = 0; j <= db; j++) { c[i+j] += (long long)a[i] * b[j] % Mod; if (c[i+j] >= Mod) c[i+j] -= Mod; } } } // Compute the product of two polynomials a[0-da] and b[0-db] in an appropriate way void prod_polynomial(int da, int db, int a[], int b[], int c[]) { const int THR = 250000; if (THR / (da + 1) >= db + 1) prod_poly_naive(da, db, a, b, c); else prod_poly_NTT(da, db, a, b, c); } typedef struct Edge { struct Edge *next; int v; } edge; int main() { int i, N, A[200001], par[200001], u, w; long long K; edge *adj[200001] = {}, e[200001], *p; scanf("%d %lld", &N, &K); for (i = 0; i <= N; i++) scanf("%d", &(A[i])); for (i = 1, par[0] = -1; i <= N; i++) { scanf("%d", &(par[i])); u = par[i]; w = i; e[i].v = w; e[i].next = adj[u]; adj[u] = &(e[i]); } int q[200001], head, tail, depth[200001], height[200001], hv_par[200001], hv_child[200001], hv_height[200001], hv_root[200001], *hv_path[200001], *hv_count[200001], max, argmax; q[0] = 0; depth[0] = 0; for (head = 0, tail = 1; head < tail; head++) { u = q[head]; for (p = adj[u]; p != NULL; p = p->next) { w = p->v; depth[w] = depth[u] + 1; q[tail++] = w; } } for (head--; head >= 0; head--) { u = q[head]; for (p = adj[u], max = 0; p != NULL; p = p->next) { w = p->v; if (max < height[w] + 1) { max = height[w] + 1; argmax = w; } } height[u] = max; if (max > 0) { hv_par[argmax] = u; hv_child[u] = argmax; hv_height[u] = hv_height[argmax] + 1; } else { hv_child[u] = -1; hv_height[u] = 0; } hv_par[u] = -1; } for (u = 0; u <= N; u++) { if (hv_par[u] >= 0) continue; hv_path[u] = (int*)malloc(sizeof(int) * (hv_height[u] + 1)); hv_count[u] = (int*)malloc(sizeof(int) * (hv_height[u] + 1)); for (w = u, i = hv_height[u]; w >= 0; w = hv_child[w], i--) { hv_path[u][i] = w; hv_root[w] = u; } } int a[400003], b[400003], c[400003]; long long tmp = 1; for (i = 1, b[0] = 1; i <= N; i++) { if ((K - i + 1) % Mod == 0) tmp = 0; else tmp = tmp * ((K - i + 1) % Mod) % Mod; tmp = div_mod(tmp, i, Mod); b[i] = tmp; } int j, uu, ww; long long ans[200001] = {}; for (head = tail - 1; head >= 0; head--) { u = q[head]; w = hv_root[u]; i = hv_height[u]; hv_count[w][i] += A[u]; if (hv_count[w][i] >= Mod) hv_count[w][i] -= Mod; if (hv_par[u] >= 0) continue; for (i = 0; i <= hv_height[w]; i++) a[i] = hv_count[w][i]; prod_polynomial(hv_height[w], hv_height[w], a, b, c); for (i = 0; i <= hv_height[w]; i++) ans[hv_path[w][i]] += c[i]; uu = par[w]; if (uu < 0) continue; ww = hv_root[uu]; for (i = 0, j = hv_height[uu] - hv_height[w] - 1; i <= hv_height[w]; i++, j++) { hv_count[ww][j] += hv_count[w][i]; if (hv_count[ww][j] >= Mod) hv_count[ww][j] -= Mod; ans[hv_path[ww][j]] += Mod - c[i]; } } for (u = 0; u <= N; u++) printf("%lld\n", ans[u] % Mod); fflush(stdout); return 0; }