#pragma GCC optimize("O3,unroll-loops") #pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt") #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using lint = long long; using pint = pair; using plint = pair; struct fast_ios { fast_ios(){ cin.tie(nullptr), ios::sync_with_stdio(false), cout << fixed << setprecision(20); }; } fast_ios_; #define ALL(x) (x).begin(), (x).end() #define FOR(i, begin, end) for(int i=(begin),i##_end_=(end);i=i##_begin_;i--) #define REP(i, n) FOR(i,0,n) #define IREP(i, n) IFOR(i,0,n) template void ndarray(vector& vec, const V& val, int len) { vec.assign(len, val); } template void ndarray(vector& vec, const V& val, int len, Args... args) { vec.resize(len), for_each(begin(vec), end(vec), [&](T& v) { ndarray(v, val, args...); }); } template bool chmax(T &m, const T q) { return m < q ? (m = q, true) : false; } template bool chmin(T &m, const T q) { return m > q ? (m = q, true) : false; } int floor_lg(long long x) { return x <= 0 ? -1 : 63 - __builtin_clzll(x); } template pair operator+(const pair &l, const pair &r) { return make_pair(l.first + r.first, l.second + r.second); } template pair operator-(const pair &l, const pair &r) { return make_pair(l.first - r.first, l.second - r.second); } template vector sort_unique(vector vec) { sort(vec.begin(), vec.end()), vec.erase(unique(vec.begin(), vec.end()), vec.end()); return vec; } template int arglb(const std::vector &v, const T &x) { return std::distance(v.begin(), std::lower_bound(v.begin(), v.end(), x)); } template int argub(const std::vector &v, const T &x) { return std::distance(v.begin(), std::upper_bound(v.begin(), v.end(), x)); } template istream &operator>>(istream &is, vector &vec) { for (auto &v : vec) is >> v; return is; } template ostream &operator<<(ostream &os, const vector &vec) { os << '['; for (auto v : vec) os << v << ','; os << ']'; return os; } template ostream &operator<<(ostream &os, const array &arr) { os << '['; for (auto v : arr) os << v << ','; os << ']'; return os; } #if __cplusplus >= 201703L template istream &operator>>(istream &is, tuple &tpl) { std::apply([&is](auto &&... args) { ((is >> args), ...);}, tpl); return is; } template ostream &operator<<(ostream &os, const tuple &tpl) { os << '('; std::apply([&os](auto &&... args) { ((os << args << ','), ...);}, tpl); return os << ')'; } #endif template ostream &operator<<(ostream &os, const deque &vec) { os << "deq["; for (auto v : vec) os << v << ','; os << ']'; return os; } template ostream &operator<<(ostream &os, const set &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template ostream &operator<<(ostream &os, const unordered_set &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template ostream &operator<<(ostream &os, const multiset &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template ostream &operator<<(ostream &os, const unordered_multiset &vec) { os << '{'; for (auto v : vec) os << v << ','; os << '}'; return os; } template ostream &operator<<(ostream &os, const pair &pa) { os << '(' << pa.first << ',' << pa.second << ')'; return os; } template ostream &operator<<(ostream &os, const map &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; } template ostream &operator<<(ostream &os, const unordered_map &mp) { os << '{'; for (auto v : mp) os << v.first << "=>" << v.second << ','; os << '}'; return os; } #ifdef HITONANODE_LOCAL const string COLOR_RESET = "\033[0m", BRIGHT_GREEN = "\033[1;32m", BRIGHT_RED = "\033[1;31m", BRIGHT_CYAN = "\033[1;36m", NORMAL_CROSSED = "\033[0;9;37m", RED_BACKGROUND = "\033[1;41m", NORMAL_FAINT = "\033[0;2m"; #define dbg(x) cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << endl #define dbgif(cond, x) ((cond) ? cerr << BRIGHT_CYAN << #x << COLOR_RESET << " = " << (x) << NORMAL_FAINT << " (L" << __LINE__ << ") " << __FILE__ << COLOR_RESET << endl : cerr) #else #define dbg(x) (x) #define dbgif(cond, x) 0 #endif constexpr int md = (115 << 20) + 1; #include using mint = atcoder::static_modint; #include // Multivariate convolution (Linear, overflow cutoff) // Complexity: (kN \log N + k^2 N)$ // Note that the vectors store the infomation in **column-major order** // Implementation idea: https://rushcheyo.blog.uoj.ac/blog/6547 // Details of my implementation: https://hitonanode.github.io/cplib-cpp/convolution/multivar_ntt.hpp template struct multivar_ntt { int K, N, fftlen; std::vector dim; std::vector chi; MODINT invfftlen; vector g; std::vector> fex, gex, hex; private: void _initialize(const std::vector &dim_) { dim = dim_; K = dim_.size(); N = std::accumulate(dim_.begin(), dim_.end(), 1, [&](int l, int r) { return l * r; }); fftlen = 1; while (fftlen < N * 2) fftlen <<= 1; invfftlen = MODINT(fftlen).inv(); chi.resize(fftlen); int t = 1; for (auto d : dim_) { t *= d; for (int s = t; s < fftlen; s += t) chi[s] += 1; } for (int i = 0; i + 1 < fftlen; i++) { chi[i + 1] += chi[i]; if (chi[i + 1] >= K) chi[i + 1] -= K; } } std::vector _convolve(const std::vector &f) { assert(int(f.size()) == N); assert(int(g.size()) == N); if (dim.empty()) return {f[0] * g[0]}; hex.assign(K, std::vector(fftlen)); fex.assign(K, std::vector(fftlen)); for (int i = 0; i < N; i++) fex[chi[i]][i] = f[i]; for (auto &vec : fex) atcoder::internal::butterfly(vec); for (int df = 0; df < K; df++) { for (int dg = 0; dg < K; dg++) { int dh = (df + dg < K) ? df + dg : df + dg - K; for (int i = 0; i < fftlen; i++) hex[dh][i] += fex[df][i] * gex[dg][i]; } } for (auto &vec : hex) atcoder::internal::butterfly_inv(vec); std::vector ret(N); for (int i = 0; i < N; i++) ret[i] = hex[chi[i]][i] * invfftlen; return ret; } public: multivar_ntt(const std::vector &dim_) { _initialize(dim_); } void set_g(const vector &g_) { g = g_; gex.assign(K, vector(fftlen)); if (dim.empty()) return; for (int i = 0; i < N; i++) gex[chi[i]][i] = g[i]; for (auto &vec : gex) atcoder::internal::butterfly(vec); } void double_g() { if (!K) { g[0] *= g[0]; return; } vector hex(K, vector(fftlen)); for (int df = 0; df < K; df++) { for (int dg = 0; dg <= df; dg++) { int dh = (df + dg < K) ? df + dg : df + dg - K; for (int i = 0; i < fftlen; i++) hex[dh][i] += gex[df][i] * gex[dg][i] * (df == dg ? 1 : 2); } } for (auto &vec : hex) atcoder::internal::butterfly_inv(vec); for (int i = 0; i < N; ++i) g[i] = hex[chi[i]][i] * invfftlen; gex.assign(K, vector(fftlen)); for (int i = 0; i < N; i++) gex[chi[i]][i] = g[i]; for (auto &vec : gex) atcoder::internal::butterfly(vec); } std::vector operator()(const std::vector &f) { return _convolve(f); } }; // 元ネタが分からないんですが,OpenCup の 7 乗根のやつですか? int main() { auto START = std::chrono::system_clock::now(); constexpr int E = 10; const mint r10 = 9142366; int N, K; lint M; int T; cin >> N >> K >> M >> T; // M %= mint::mod() - 1; int K10 = 1; REP(t, K) K10 *= 10; vector A(N); cin >> A; vector diminfo(T, E); // T 桁切捨,K - T 桁周期 multivar_ntt mntt(diminfo); vector nttmat(E, vector(E)); REP(i, nttmat.size()) REP(j, nttmat[i].size()) nttmat[i][j] = r10.pow(i * j); auto inttmat = nttmat; for (auto &vec : inttmat) for (auto &x : vec) x = x.inv() / mint(10); auto ntt10 = [&](const array &v) { array ret; ret.fill(0); REP(i, E) REP(j, E) ret[i] += nttmat[i][j] * v[j]; return ret; }; auto intt10 = [&](const array &v) { array ret; ret.fill(0); REP(i, E) REP(j, E) ret[i] += inttmat[i][j] * v[j]; return ret; }; auto circular_ntt = [&](vector &f) { for (int di = mntt.N; di < K10; di *= 10) { for (int l = 0; l < K10; l += di * 10) { for (int i = l; i < l + di; ++i) { // [i, i + di, i + 2di, ..., i + 9di] を NTT auto impose_ntt = [&](vector &v) { static array ntttmp; ntttmp.fill(0); REP(k, E) ntttmp[k] = v[i + k * di]; ntttmp = ntt10(ntttmp); REP(k, E) v[i + k * di] = ntttmp[k]; }; impose_ntt(f); } } } }; auto circular_intt = [&](vector &g) { for (int di = mntt.N; di < K10; di *= 10) { for (int l = 0; l < K10; l += di * 10) { for (int i = l; i < l + di; ++i) { // [i, i + di, i + 2di, ..., i + 9di] を NTT auto impose_intt = [&](vector &v) { static array ntttmp; REP(k, E) ntttmp[k] = v[i + k * di]; ntttmp = intt10(ntttmp); REP(k, E) v[i + k * di] = ntttmp[k]; }; impose_intt(g); } } } }; vector dp(K10), trans(K10); dp[0] = 1; for (auto a : A) trans[a] += 1; circular_ntt(dp); circular_ntt(trans); vector ret; for (int l = 0; l < K10; l += mntt.N) { vector fsub(dp.begin() + l, dp.begin() + l + mntt.N); vector gsub(trans.begin() + l, trans.begin() + l + mntt.N); lint p = M; if (gsub[0] == 0) chmin(p, 100000LL); // Multivar pow なにもわからない...... mntt.set_g(gsub); while (p) { if (p & 1) fsub = mntt(fsub); p /= 2; if (!p) break; // gsub = mntt(gsub); mntt.double_g(); } ret.insert(ret.end(), fsub.begin(), fsub.end()); } dp = ret; circular_intt(dp); #ifndef HITONANODE_LOCAL for (auto x : dp) cout << x.val() << '\n'; #endif int64_t spent_ms = std::chrono::duration_cast(std::chrono::system_clock::now() - START).count(); dbg(spent_ms); }