結果

問題 No.206 数の積集合を求めるクエリ
ユーザー antaanta
提出日時 2015-05-28 20:10:46
言語 Perl
(5.38.2)
結果
AC  
実行時間 87 ms / 7,000 ms
コード長 17,524 bytes
コンパイル時間 1,838 ms
コンパイル使用メモリ 108,384 KB
実行使用メモリ 8,340 KB
最終ジャッジ日時 2024-07-06 12:11:29
合計ジャッジ時間 5,080 ms
ジャッジサーバーID
(参考情報)
judge1 / judge2
外部呼び出し有り
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 56 ms
7,484 KB
testcase_01 AC 55 ms
7,532 KB
testcase_02 AC 55 ms
7,456 KB
testcase_03 AC 55 ms
7,420 KB
testcase_04 AC 56 ms
7,592 KB
testcase_05 AC 55 ms
7,424 KB
testcase_06 AC 56 ms
7,440 KB
testcase_07 AC 57 ms
7,424 KB
testcase_08 AC 57 ms
7,476 KB
testcase_09 AC 57 ms
7,424 KB
testcase_10 AC 56 ms
7,592 KB
testcase_11 AC 56 ms
7,552 KB
testcase_12 AC 57 ms
7,540 KB
testcase_13 AC 56 ms
7,552 KB
testcase_14 AC 56 ms
7,488 KB
testcase_15 AC 57 ms
7,540 KB
testcase_16 AC 56 ms
7,460 KB
testcase_17 AC 78 ms
8,312 KB
testcase_18 AC 67 ms
8,188 KB
testcase_19 AC 76 ms
8,156 KB
testcase_20 AC 67 ms
7,808 KB
testcase_21 AC 70 ms
8,064 KB
testcase_22 AC 70 ms
8,020 KB
testcase_23 AC 76 ms
8,340 KB
testcase_24 AC 87 ms
8,064 KB
testcase_25 AC 83 ms
8,136 KB
testcase_26 AC 76 ms
8,192 KB
testcase_27 AC 71 ms
7,832 KB
testcase_28 AC 78 ms
8,100 KB
testcase_29 AC 80 ms
8,192 KB
testcase_30 AC 74 ms
7,792 KB
権限があれば一括ダウンロードができます
コンパイルメッセージ
Main.pl syntax OK

ソースコード

diff #

system './' . $exe_name;

BEGIN {
    $exe_name = $^O eq 'MSWin32' ? 'a.exe' : 'a.out';
    return if -e $exe_name;
    open my $fh, '>', 'tmp.cpp';
    print $fh <<'CODE';
#line 8

#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <set>
#include <map>
#include <queue>
#include <iostream>
#include <sstream>
#include <cstdio>
#include <cmath>
#include <ctime>
#include <cstring>
#include <cctype>
#include <limits>
#include <functional>

#include <cstdint>

#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cassert>

#include <nmmintrin.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif

#ifdef _DEBUG
#undef assert
#include "C:\Dropbox\backup\implements\Util\MyAssert.hpp"
#define assert my_assert
#else
#undef assert
#define assert(x) 
#endif

#define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i))
#define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i))
#define reu(i,l,u) for(int (i)=(int)(l);(i)<(int)(u);++(i))
#if defined(_MSC_VER) || __cplusplus > 199711L
#define aut(r,v) auto r = (v)
#else
#define aut(r,v) __typeof(v) r = (v)
#endif
#define each(it,o) for(aut(it, (o).begin()); it != (o).end(); ++ it)
#define all(o) (o).begin(), (o).end()
#define pb(x) push_back(x)
#define mp(x,y) make_pair((x),(y))
#define mset(m,v) memset(m,v,sizeof(m))
#define INF 0x3f3f3f3f
#define INFL 0x3f3f3f3f3f3f3f3fLL
using namespace std;
typedef vector<int> vi; typedef pair<int,int> pii; typedef vector<pair<int,int> > vpii; typedef long long ll;
template<typename T, typename U> inline void amin(T &x, U y) { if(y < x) x = y; }
template<typename T, typename U> inline void amax(T &x, U y) { if(x < y) x = y; }

#ifdef _MSC_VER
#define alignas(x) __declspec(align(x))
#endif

template<typename R_>
struct IntOpDefault {
	typedef R_ R;
	static void copy(R *res, const R *p, int n) {
		for(int i = 0; i < n; ++ i)
			res[i] = p[i];
	}

	static void fill_zero(R *p, int n) {
		for(int i = 0; i < n; ++ i)
			p[i] = R();
	}

	static R inverse(R x) {
		R i = x, p, TWO = R(2), ONE = R(1);
		do {
			p = i * x;
			i *= TWO - p;
		}while(!(p == ONE));
		return i;
	}

	static void swap_range(R *p, R *q, int n) {
		using std::swap;
		for(int i = 0; i < n; ++ i)
			swap(p[i], q[i]);
	}

	static void three_point_fft(int p, R *x0, R *x1, R *x2, R *t1, R *t2) {
		//x0' = x0 +        t1 +        t2
		//x1' = x0 + X^p    t1 + X^{2p} t2
		//x2' = x0 + X^{2p} t1 + X^p    t2
		//
		//X^p    t = -tb       + (ta - tb) X^p
		//X^{2p} t = (xb - xa) + -xa       X^p 
		//
		//x0' = (x0a + t1a +       t2a) + (x0b + t1b       + t2b) X^p
		//x1' = (x0a - t1b - t2a + t2b) + (x0b + t1a - t1b - t2a) X^p
		//x2' = (x0a - t1a + t1b - t2b) + (x0b - t1a + t2a - t2b) X^p

		for(int a = 0, b = p; a < p; ++ a, ++ b) {
			R x0a = x0[a], t1a = t1[a], t2a = t2[a];
			R x0b = x0[b], t1b = t1[b], t2b = t2[b];
			x0[a] = x0a + t1a + t2a;
			x0[b] = x0b + t1b + t2b;
			x1[a] = x0a - t1b - t2a + t2b;
			x1[b] = x0b + t1a - t1b - t2a;
			x2[a] = x0a - t1a + t1b - t2b;
			x2[b] = x0b - t1a + t2a - t2b;
		}
	}
};

struct u32x4 {
	__m128i v;
	u32x4(): v(_mm_setzero_si128()) { }
	u32x4(const __m128i &v_): v(v_) { }

	static u32x4 set1(uint32_t x) {
		return u32x4(_mm_set1_epi32(x));
	}
	template<typename T> static u32x4 loadu(const T *p) {
		return u32x4(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
	}
	template<typename T> void storeu(T *p) const {
		_mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
	}

	u32x4 operator*(const u32x4 &that) const {
		return u32x4(_mm_mullo_epi32(v, that.v));
	}
	u32x4 operator+(const u32x4 &that) const {
		return u32x4(_mm_add_epi32(v, that.v));
	}
	u32x4 operator-(const u32x4 &that) const {
		return u32x4(_mm_sub_epi32(v, that.v));
	}
	u32x4 &operator+=(const u32x4 &that) {
		return *this = *this + that;
	}

	template<int s> u32x4 slli() const {
		return u32x4(_mm_slli_si128(v, s));
	}
	u32x4 slli4() const { return slli<4>(); }
	u32x4 slli8() const { return slli<8>(); }
	u32x4 slli12() const { return slli<12>(); }

	template<int s> u32x4 srli() const {
		return u32x4(_mm_srli_si128(v, s));
	}

	u32x4 srli4() const { return srli<4>(); }
	u32x4 srli8() const { return srli<8>(); }
	u32x4 srli12() const { return srli<12>(); }
};


struct IntOp32 : IntOpDefault<IntOp32> {
	uint32_t x;
	IntOp32(): x(0) { }
	explicit IntOp32(uint32_t x_): x(x_) { }

	IntOp32 &operator+=(const IntOp32 &that) { x += that.x; return *this; }
	IntOp32 &operator-=(const IntOp32 &that) { x -= that.x; return *this; }
	IntOp32 &operator*=(const IntOp32 &that) { x *= that.x; return *this; }

	IntOp32 operator+(const IntOp32 &that) const { return IntOp32(x + that.x); }
	IntOp32 operator-(const IntOp32 &that) const { return IntOp32(x - that.x); }
	IntOp32 operator*(const IntOp32 &that) const { return IntOp32(x * that.x); }
	IntOp32 operator-() const { return IntOp32(~x + 1); }

	bool operator==(const IntOp32 &that) const { return x == that.x; }

	//resは (PN_4 + QN_4) * 4 のサイズを書き込む
	template<int PN_4, int QN_4>
	static void convolute_schoolbook_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
		u32x4 sum[PN_4 + QN_4];

		for(int i = 0; i < PN_4; ++ i) {
			u32x4 x0 = u32x4::set1(p[i * 4 + 0]);
			u32x4 x1 = u32x4::set1(p[i * 4 + 1]);
			u32x4 x2 = u32x4::set1(p[i * 4 + 2]);
			u32x4 x3 = u32x4::set1(p[i * 4 + 3]);

			for(int j = 0; j < QN_4; ++ j) {
				u32x4 y = u32x4::loadu(q + j * 4);
				u32x4 z0 = x0 * y;
				u32x4 z1 = x1 * y;
				u32x4 z2 = x2 * y;
				u32x4 z3 = x3 * y;

				sum[i + j + 0] += (z0 + z1.slli4()) + (z2.slli8() + z3.slli12());
				sum[i + j + 1] += (z1.srli8() + z2.srli4() + z3).srli4();
			}
		}

		for(int i = 0; i < PN_4 + QN_4; ++ i)
			sum[i].storeu(res + i * 4);
	}

	typedef u32x4 Vec;
	static const int V = 4;

	enum { KARATSUBA_THRESHOLD_4 = 4 };

#define ENABLE_KARATSUBA(PN_4, QN_4) \
	((PN_4) >= KARATSUBA_THRESHOLD_4 && (QN_4) >= KARATSUBA_THRESHOLD_4)

	template<int PN_4, int QN_4>
	static typename enable_if<ENABLE_KARATSUBA(PN_4,QN_4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
		enum { LO_4 = (PN_4 + 1) / 2, HP_4 = PN_4 - LO_4, HQ_4 = QN_4 - LO_4 };
		static_assert(0 < LO_4 && 0 < HQ_4 && HP_4 <= LO_4 && HQ_4 <= LO_4, "parameters");
		uint32_t t0[LO_4 * 4], t1[LO_4 * 4], r1[LO_4 * 4 * 2];
		uint32_t * const r0 = res, * const rinf = res + LO_4 * 4 * 2;
		add_template<LO_4 * 4, HP_4 * 4>(t0, p, p + LO_4 * 4);
		add_template<LO_4 * 4, HQ_4 * 4>(t1, q, q + LO_4 * 4);
		convolute_template<LO_4, LO_4>(r1, t0, t1);
		convolute_template<LO_4, LO_4>(r0, p, q);
		convolute_template<HP_4, HQ_4>(rinf, p + LO_4 * 4, q + LO_4 * 4);
		subtract_template<LO_4 * 4 * 2>(r1, r0);
		subtract_template<HP_4 * 4 + HQ_4 * 4>(r1, rinf);
		add_template<LO_4 * 4 * 2>(res + LO_4 * 4, r1);
	}

	template<int PN_4, int QN_4>
	static typename enable_if<!ENABLE_KARATSUBA(PN_4,QN_4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
		return convolute_schoolbook_template<PN_4,QN_4>(res, p, q);
	}

	template<int PN_4, int QN_4>
	static void convolute_template(R *res, const R *p, const R *q) {
		convolute_template<PN_4,QN_4>((uint32_t*)res, (const uint32_t*)p, (const uint32_t*)q);
	}
#undef ENABLE_KARATSUBA


	template<int N>
	static void add_template(uint32_t *p, const uint32_t *q) {
		for(int i = 0; i < N / V; ++ i) {
			Vec sum = u32x4::loadu(p + i * V) + Vec::loadu(q + i * V);
			sum.storeu(p + i * V);
		}
		for(int i = N / V * V; i < N; ++ i)
			p[i] += q[i];
	}
	template<int PN, int QN>
	static void add_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
		static_assert(PN >= QN, "PN >= QN");
		for(int i = 0; i < QN / V; ++ i) {
			Vec sum = Vec::loadu(p + i * V) + Vec::loadu(q + i * V);
			sum.storeu(res + i * V);
		}
		for(int i = QN / V * V; i < QN; ++ i)
			res[i] = p[i] + q[i];
		for(int i = QN; i < PN; ++ i)
			res[i] = p[i];
	}

	template<int N>
	static void subtract_template(uint32_t *p, const uint32_t *q) {
		for(int i = 0; i < N / V; ++ i) {
			Vec diff = Vec::loadu(p + i * V) - Vec::loadu(q + i * V);
			diff.storeu(p + i * V);
		}
		for(int i = N / V * V; i < N; ++ i)
			p[i] -= q[i];
	}
	template<int PN, int QN>
	static void subtract_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
		static_assert(PN >= QN, "PN >= QN");
		for(int i = 0; i < QN / V; ++ i) {
			Vec diff = Vec::loadu(p + i * V) - Vec::loadu(q + i * V);
			diff.storeu(res + i * V);
		}
		for(int i = QN / V * V; i < QN; ++ i)
			res[i] = p[i] - q[i];
		for(int i = QN; i < PN; ++ i)
			res[i] = p[i];
	}

	static void add(R *p, const R *q, int n) {
		int n_t = n / V * V;
		for(int i = 0; i < n_t; i += V) {
			Vec sum = Vec::loadu(p + i) + Vec::loadu(q + i);
			sum.storeu(p + i);
		}
		for(int i = n_t; i < n; ++ i)
			p[i] += q[i];
	}

	static void subtract(R *p, const R *q, int n) {
		int n_t = n / V * V;
		for(int i = 0; i < n_t; i += V) {
			Vec diff = Vec::loadu(p + i) - Vec::loadu(q + i);
			diff.storeu(p + i);
		}
		for(int i = n_t; i < n; ++ i)
			p[i] -= q[i];
	}

	static void negate_all(R *p, const R *q, int n) {
		int n_t = n / V * V;
		Vec zero = Vec();
		for(int i = 0; i < n_t; i += V) {
			Vec neg = zero - Vec::loadu(q + i);
			neg.storeu(p + i);
		}
		for(int i = n_t; i < n; ++ i)
			p[i] = -q[i];
	}

	static void three_point_fft(int p, R *x0, R *x1, R *x2, R *t1, R *t2) {
		int p_t = p / V * V;
		for(int a = 0, b = p; a < p_t; a += V, b += V) {
			Vec x0a = Vec::loadu(x0 + a), t1a = Vec::loadu(t1 + a), t2a = Vec::loadu(t2 + a);
			Vec x0b = Vec::loadu(x0 + b), t1b = Vec::loadu(t1 + b), t2b = Vec::loadu(t2 + b);
			(x0a + t1a + t2a).storeu(x0 + a);
			(x0b + t1b + t2b).storeu(x0 + b);
			(x0a - t1b - t2a + t2b).storeu(x1 + a);
			(x0b + t1a - t1b - t2a).storeu(x1 + b);
			(x0a - t1a + t1b - t2b).storeu(x2 + a);
			(x0b - t1a + t2a - t2b).storeu(x2 + b);
		}
		for(int a = p_t, b = p + p_t; a < p; ++ a, ++ b) {
			R x0a = x0[a], t1a = t1[a], t2a = t2[a];
			R x0b = x0[b], t1b = t1[b], t2b = t2[b];
			x0[a] = x0a + t1a + t2a;
			x0[b] = x0b + t1b + t2b;
			x1[a] = x0a - t1b - t2a + t2b;
			x1[b] = x0b + t1a - t1b - t2a;
			x2[a] = x0a - t1a + t1b - t2b;
			x2[b] = x0b - t1a + t2a - t2b;
		}
	}

	static void multiply_scalar(R *p, int n, R scalar) {
		int n_t = n / V * V;
		Vec x = Vec::set1(scalar.x);
		for(int i = 0; i < n_t; i += V) {
			Vec prod = Vec::loadu(p + i) * x;
			prod.storeu(p + i);
		}
		for(int i = n_t; i < n; ++ i)
			p[i] *= scalar;
	}
};


template<int k>
struct power_of_three_template {
	enum { val = power_of_three_template<k-1>::val * 3 };
};
template<>
struct power_of_three_template<0> {
	enum { val = 1 };
};

struct IntPolynomial {
	typedef IntOp32 R;

	static const int MaxK = 10;
	static int power_of_three(int k) {
		assert(0 <= k && k <= MaxK);
		static const int table[MaxK+1] = {
			1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049
		};
		return table[k];
	}

	template<int Xn, int Yn, int N, int k>
	static void schonhage_strassen_template(R *res, const R *X, const R *Y);
	static void schonhage_strassen_decompose(R *x, const R *X, int K, int n, int N, int m);
	static void schonhage_strassen_compose(R *res, const R *x, int K, int n, int N, int m);
	static void base3_modulo(R *x, int p);
	static void base3_shift(R *res, const R *x, int p, int shift, R *tmp);
	static void base3_fft(bool inv, int k, R *x, int p, R *tmp_buffer);
};

template<int Xn, int Yn, int N, int k>
void IntPolynomial::schonhage_strassen_template(R *res, const R *X, const R *Y) {
	static_assert(0 < k && k <= MaxK, "param");
	static_assert(0 < N && Xn <= N && Yn <= N, "param");
	enum { K = power_of_three_template<k>::val, K_3 = K / 3 };
	static_assert(N % K == 0, "param");
	enum { m = N / K };	//X,YをK個に分割するときの1つのサイズ
	enum { p = ((m - 1) / K_3 + 1) * K_3 };	//(K | 3p)であり、2m <= 2p である最小の数
	enum { n = p * 2, n_4 = (n + 3) / 4,  };	//一つのPの要素として扱うサイズ
	static_assert((int)n < (int)N, "n < N");
	enum { order = p * 3 };
	static_assert(order % K == 0, "order");
	enum { omega = order / K };	//K^th root of unity
	enum { inv_omega = order - omega };
	enum { nK = n * K };

	R *tmp_buffer = new R[nK * 2 + n * 3];
	R *x = tmp_buffer, *y = x + nK, *tmp = y + nK;

	schonhage_strassen_decompose(x, X, K, n, Xn, m);
	schonhage_strassen_decompose(y, Y, K, n, Yn, m);

	base3_fft(false, k, x, p, tmp);
	base3_fft(false, k, y, p, tmp);

	{	alignas(16) R tmp_x[n_4 * 4], tmp_y[n_4 * 4], tmp_res[n_4 * 4 * 2];
		for(int i = 0; i < nK; i += n) {
			R::copy(tmp_x, x + i, n);
			R::fill_zero(tmp_x + n, n_4 * 4 - n);
			R::copy(tmp_y, y + i, n);
			R::fill_zero(tmp_y + n, n_4 * 4 - n);

			R::convolute_template<n_4, n_4>(tmp_res, tmp_x, tmp_y);

			base3_modulo(tmp_res, p);
			R::copy(x + i, tmp_res, n);
		}
	}

	base3_fft(true, k, x, p, tmp);

	R invK = R::inverse(R(K));
	R::multiply_scalar(x, nK, invK);

	schonhage_strassen_compose(res, x, K, n, N, m);

	delete[] tmp_buffer;
}

void IntPolynomial::schonhage_strassen_decompose(R *x, const R *X, int K, int n, int N, int m) {
	assert((N + m - 1) / m <= K);
	R::fill_zero(x, n * K);
	for(int i = 0, j = 0; ; i += n, j += m) {
		if(j + m <= N) {
			R::copy(x + i, X + j, m);
		}else {
			R::copy(x + i, X + j, N - j);
			break;
		}
	}
}

void IntPolynomial::schonhage_strassen_compose(R *res, const R *x, int K, int n, int N, int m) {
	assert(N >= n);
	int nK = n * K;
	R::fill_zero(res, N);
	for(int i = 0, j = 0; i < nK; i += n) {
		if(j + n <= N) {
			R::add(res + j, x + i, n);
		}else {
			int s = N - j;
			R::add(res + j, x + i, s);
			R::add(res, x + i + s, n - s);
		}
		if((j += m) >= N)
			j -= N;
	}
}

//4p-1サイズのxを(X^{2p} + X^p + 1)で剰余を取る
void IntPolynomial::base3_modulo(R *x, int p) {
	//4p-2..2pのうち、距離がp未満なら相互に作用しないので、
	//4p-2..3p, 3p-1..2p の2つに分割して、その中では連続した演算となる
	R::subtract(x + p    , x + p * 3, p - 1);
	R::subtract(x + p * 2, x + p * 3, p - 1);
	R::subtract(x        , x + p * 2, p);
	R::subtract(x + p    , x + p * 2, p);
}

//サイズ2pのxに対してX^shiftをかけた後(X^{2p} + X^p + 1)で剰余を取る
//tmpは 2p のサイズが必要
void IntPolynomial::base3_shift(R *res, const R *x, int p, int shift, R *tmp) {
	assert(0 <= shift && shift < 3 * p);
	assert(res != x);
	if(shift == 0) {
		R::copy(res, x, p * 2);
	}else if(shift == p) {
		//X^p x = X^{2p} x2 + X^p x1
		//res = X^p (x1 - x2) - x2
		R::negate_all(res, x + p, p);
		R::copy(res + p, x, p);
		R::subtract(res + p, x + p, p);
	}else if(shift < p) {
		//X^shift x = X^{2p} x2 + X^p x1 + x0
		//res = X^p (x1 - x2) + (x0 - x2)
		R::copy(res + shift, x, p * 2 - shift);
		R::fill_zero(res, shift);
		R::subtract(res, x + (p * 2 - shift), shift);
		R::subtract(res + p, x + (p * 2 - shift), shift);
	}else if(shift == p * 2) {
		//X^{2p} x = X^{3p} x3 + X^{2p} x2
		//res = X^p (-x2) + (x3 - x2)
		R::negate_all(res + p, x, p);
		R::copy(res, x + p, p);
		R::subtract(res, x, p);
	}else if(shift < p * 2) {
		//X^shift x = X^{3p} x3 + X^{2p} x2 + X^p x1
		//res = (x1 - x2) X^p + (x3 - x2)
		int s = shift - p;
		const R *x1 = x, *x2 = x + (p - s), *x3 = x + (2 * p - s);
		R::copy(res + p + s, x1, p - s);
		R::fill_zero(res + p, s);
		R::copy(res, x3, s);
		R::fill_zero(res + s, p - s);
		R::subtract(res, x2, p);
		R::subtract(res + p, x2, p);
		return;
	}else {
		//X^shift x = X^{4p} x4 + X^{3p} x3 + X^{2p} x2
		//res = (x4 - x2) X^p + (x3 - x2)
		int s = shift - p * 2;
		R::copy(res, x + (p - s), p + s);
		R::fill_zero(res + (p + s), p - s);
		R::subtract(res + s, x, p - s);
		R::subtract(res + p + s, x, p - s);
		return;
	}
}

//tmp_bufferは3nのサイズ
void IntPolynomial::base3_fft(bool inv, int k, R *x, int p, R *tmp_buffer) {
	int n = p * 2, order = p * 3;
	int K = power_of_three(k), K_3 = K / 3;

	for(int i = 0; i < K; ++ i) {
		int j = 0;
		for(int t = 0, a = i; t < k; ++ t){
			j = j * 3 + a % 3;
			a /= 3;
		}
		if(i < j)
			R::swap_range(x + i * n, x + j * n, n);
	}

	R *t1 = tmp_buffer, *t2 = t1 + n, *tmp = t2 + n;
	int omega = !inv ? order / K : order - order / K;
	int omegaK_3 = omega * K_3 % order;
	for(int L = 3; L <= K; L *= 3) {
		int L_3 = L / 3, nL_3 = n * L_3;
		int ww = omega * (K / L) % order, ww2 = ww * 2 % order;
		for(int r = 0; r < K; r += L) {
			int w = 0, w2 = 0;
			R *x0 = x + r * n, *x1 = x0 + nL_3, *x2 = x1 + nL_3;
			for(int j = 0; j < L_3; ++ j) {
				base3_shift(t1, x1, p, w, tmp);
				base3_shift(t2, x2, p, w2, tmp);

				if(!inv)
					R::three_point_fft(p, x0, x1, x2, t1, t2);
				else
					R::three_point_fft(p, x0, x1, x2, t2, t1);

				if((w += ww) >= order) w -= order;
				if((w2 += ww2) >= order) w2 -= order;
				x0 += n, x1 += n, x2 += n;
			}
		}
	}
}

alignas(16) uint32_t p[100000], q[100000], res[200000];
int main() {
    int L, M, N;
    scanf("%d%d%d", &L, &M, &N);
    rep(i, L) {
        int a;
        scanf("%d", &a), -- a;
        p[N-1-a] = 1;
    }
    rep(i, M) {
        int b;
        scanf("%d", &b), -- b;
        q[b] = 1;
    }
	using R = IntPolynomial::R;
	IntPolynomial::schonhage_strassen_template<100000,100000,200232,5>((R*)res, (const R*)p, (const R*)q);
    int Q;
    scanf("%d", &Q);
    rep(i, Q) {
        int ans = res[N-1-i];
        printf("%d\n", ans);
    }
    return 0;
}

CODE
    system "g++ -m64 -O2 -lm -mavx -std=c++11 tmp.cpp -o $exe_name 2> my_compile.log";
    if($? != 0) {
        open(my $fh, '<', 'my_compile.log');
        while(<$fh>) { print STDERR $_; }
        die 'compile error';
    }
}
0