#pragma GCC optimize("Ofast") #include #define _overload3(_1, _2, _3, name, ...) name #define _rep(i, n) repi(i, 0, n) #define repi(i, a, b) for (int i = (a); i < (b); ++i) #define rep(...) _overload3(__VA_ARGS__, repi, _rep, )(__VA_ARGS__) #define ALL(x) x.begin(), x.end() #define chmax(x, y) x = max(x, y) #define chmin(x, y) x = min(x, y) using namespace std; random_device rnd; mt19937 mt(rnd()); using ll = long long; using lld = long double; using VI = vector; using VVI = vector; using VL = vector; using VVL = vector; using PII = pair; const double EPS = 1e-3; const double PI = 3.1415926535897932384626433832795028841971; const int IINF = 1 << 30; const ll INF = 1ll << 60; const ll MOD = 1000000007; VVL nums; ll p, n, k, b; VL v(101010); //FFT from https://ei1333.github.io/luzhiled/snippets/math/fast-fourier-transform.html namespace FastFourierTransform { using real = long double; struct C { real x, y; C() : x(0), y(0) {} C(real x, real y) : x(x), y(y) {} inline C operator+(const C &c) const { return C(x + c.x, y + c.y); } inline C operator-(const C &c) const { return C(x - c.x, y - c.y); } inline C operator*(const C &c) const { return C(x * c.x - y * c.y, x * c.y + y * c.x); } inline C conj() const { return C(x, -y); } }; const real PI = acosl(-1); int base = 1; vector rts = {{0, 0}, {1, 0}}; vector rev = {0, 1}; void ensure_base(int nbase) { if (nbase <= base) return; rev.resize(1 << nbase); rts.resize(1 << nbase); for (int i = 0; i < (1 << nbase); i++) { rev[i] = (rev[i >> 1] >> 1) + ((i & 1) << (nbase - 1)); } while (base < nbase) { real angle = PI * 2.0 / (1 << (base + 1)); for (int i = 1 << (base - 1); i < (1 << base); i++) { rts[i << 1] = rts[i]; real angle_i = angle * (2 * i + 1 - (1 << base)); rts[(i << 1) + 1] = C(cos(angle_i), sin(angle_i)); } ++base; } } void fft(vector &a, int n) { assert((n & (n - 1)) == 0); int zeros = __builtin_ctz(n); ensure_base(zeros); int shift = base - zeros; for (int i = 0; i < n; i++) { if (i < (rev[i] >> shift)) { swap(a[i], a[rev[i] >> shift]); } } for (int k = 1; k < n; k <<= 1) { for (int i = 0; i < n; i += 2 * k) { for (int j = 0; j < k; j++) { C z = a[i + j + k] * rts[j + k]; a[i + j + k] = a[i + j] - z; a[i + j] = a[i + j] + z; } } } } vector multiply(const vector &a, const vector &b) { int need = (int)a.size() + (int)b.size() - 1; int nbase = 1; while ((1 << nbase) < need) nbase++; ensure_base(nbase); int sz = 1 << nbase; vector fa(sz); for (int i = 0; i < sz; i++) { int x = (i < (int)a.size() ? a[i] : 0); int y = (i < (int)b.size() ? b[i] : 0); fa[i] = C(x, y); } fft(fa, sz); C r(0, -0.25 / (sz >> 1)), s(0, 1), t(0.5, 0); for (int i = 0; i <= (sz >> 1); i++) { int j = (sz - i) & (sz - 1); C z = (fa[j] * fa[j] - (fa[i] * fa[i]).conj()) * r; fa[j] = (fa[i] * fa[i] - (fa[j] * fa[j]).conj()) * r; fa[i] = z; } for (int i = 0; i < (sz >> 1); i++) { C A0 = (fa[i] + fa[i + (sz >> 1)]) * t; C A1 = (fa[i] - fa[i + (sz >> 1)]) * t * rts[(sz >> 1) + i]; fa[i] = A0 + A1 * s; } fft(fa, sz >> 1); vector ret(need); for (int i = 0; i < need; i++) { ret[i] = llround(i & 1 ? fa[i >> 1].y : fa[i >> 1].x); } return ret; } }; // namespace FastFourierTransform ll pmod(ll base, ll n) { if (n == 0) return 1; ll prev = pmod(base, n / 2); if (n % 2 == 0) { return (prev * prev) % p; } else { return (prev * prev * base) % p; } } int main() { cin >> p >> n >> k >> b; nums.resize(n); rep(i, n) nums[i] = VL(p, 0); rep(i, n) cin >> v[i]; ll tmp; rep(i, n) { rep(j, p) { tmp = pmod(j, k); tmp = (tmp * v[i]) % p; nums[i][tmp]++; } } VL ans = nums[0]; //cerr << "calc" << endl; rep(i, 1, n) { vector nans = FastFourierTransform::multiply(ans, nums[i]); rep(j, p) { ans[j] = 0; } rep(j, nans.size()) { ans[j % p] += nans[j] % MOD; ans[j % p] %= MOD; } } cout << ans[b] << endl; return 0; }