#94425 (C++11) No.206 数の積集合を求めるクエリ

提出ソース

結果

問題	No.206 数の積集合を求めるクエリ
ユーザー	Min_25
提出日時	2016-05-28 12:12:59
言語	C++11 (gcc 11.4.0)
結果	AC
実行時間	23 ms / 7,000 ms
コード長	5,325 bytes
コンパイル時間	628 ms
コンパイル使用メモリ	75,468 KB
実行使用メモリ	6,272 KB
最終ジャッジ日時	2024-10-07 17:20:42
合計ジャッジ時間	2,944 ms
ジャッジサーバーID （参考情報）	judge3 / judge4

このコードへのチャレンジ
（要ログイン）

テストケース

テストケース表示

入力	結果	実行時間実行使用メモリ
testcase_00	AC	2 ms 5,248 KB
testcase_01	AC	2 ms 5,248 KB
testcase_02	AC	1 ms 5,248 KB
testcase_03	AC	1 ms 5,248 KB
testcase_04	AC	2 ms 5,248 KB
testcase_05	AC	1 ms 5,248 KB
testcase_06	AC	2 ms 5,248 KB
testcase_07	AC	2 ms 5,248 KB
testcase_08	AC	2 ms 5,248 KB
testcase_09	AC	2 ms 5,248 KB
testcase_10	AC	2 ms 5,248 KB
testcase_11	AC	2 ms 5,248 KB
testcase_12	AC	2 ms 5,248 KB
testcase_13	AC	2 ms 5,248 KB
testcase_14	AC	2 ms 5,248 KB
testcase_15	AC	2 ms 5,248 KB
testcase_16	AC	2 ms 5,248 KB
testcase_17	AC	21 ms 5,760 KB
testcase_18	AC	20 ms 5,888 KB
testcase_19	AC	21 ms 5,760 KB
testcase_20	AC	19 ms 5,888 KB
testcase_21	AC	19 ms 5,888 KB
testcase_22	AC	19 ms 5,888 KB
testcase_23	AC	21 ms 5,760 KB
testcase_24	AC	23 ms 6,144 KB
testcase_25	AC	22 ms 6,144 KB
testcase_26	AC	21 ms 6,144 KB
testcase_27	AC	21 ms 5,888 KB
testcase_28	AC	23 ms 6,272 KB
testcase_29	AC	23 ms 6,272 KB
testcase_30	AC	22 ms 6,272 KB

権限があれば一括ダウンロードができます

ソースコード

raw source code

#include <cstdio>
#include <cassert>
#include <cmath>
#include <cstring>

#include <algorithm>
#include <iostream>
#include <vector>
#include <functional>

#define _fetch(_1, _2, _3, _4, name, ...) name
#define rep2(i, n) rep3(i, 0, n)
#define rep3(i, a, b) rep4(i, a, b, 1)
#define rep4(i, a, b, c) for (int i = int(a); i < int(b); i += int(c))
#define rep(...) _fetch(__VA_ARGS__, rep4, rep3, rep2, _)(__VA_ARGS__)

using namespace std;

using i64 = long long;
using u32 = unsigned;
using u64 = unsigned long long;
using f80 = long double;

namespace ntt {

template <u64 mod, u64 prim_root>
class Mod64 {
private:
  using u128 = __uint128_t;
  static constexpr u64 mul_inv(u64 n, int e=6, u64 x=1) {
    return e == 0 ? x : mul_inv(n, e-1, x*(2-x*n));
  }
public:
  static constexpr u64 inv = mul_inv(mod);
  static constexpr u64 r2 = -u128(mod) % mod;
  static constexpr int level = __builtin_ctzll(mod - 1);
  static_assert(inv * mod == 1, "invalid 1/M modulo 2^64.");

  Mod64() {}
  Mod64(u64 n) : x(init(n)) {};
  static u64 modulo() { return mod; }
  static u64 init(u64 w) { return reduce(u128(w) * r2); }
  static u64 reduce(const u128 w) { return u64(w >> 64) + mod - ((u128(u64(w) * inv) * mod) >> 64); }
  static Mod64 omega() { return Mod64(prim_root).pow((mod - 1) >> level); }
  Mod64& operator += (Mod64 rhs) { this->x += rhs.x; return *this; }
  Mod64& operator -= (Mod64 rhs) { this->x += 2 * mod - rhs.x; return *this; }
  Mod64& operator *= (Mod64 rhs) { this->x = reduce(u128(this->x) * rhs.x); return *this; }
  Mod64 operator + (Mod64 rhs) const { return Mod64(*this) += rhs; }
  Mod64 operator - (Mod64 rhs) const { return Mod64(*this) -= rhs; }
  Mod64 operator * (Mod64 rhs) const { return Mod64(*this) *= rhs; }
  u64 get() const { return reduce(this->x) % mod; }
  void set(u64 n) const { this->x = n; }
  Mod64 pow(u64 exp) const {
    Mod64 ret = Mod64(1);
    for (Mod64 base = *this; exp; exp >>= 1, base *= base) if (exp & 1) ret *= base;
    return ret;
  }
  Mod64 inverse() const { return pow(mod - 2); }
  friend ostream& operator << (ostream& os, const Mod64& m) { return os << m.get(); }
  static void debug() {
    printf("%llu %llu %llu %llu\n", mod, inv, r2, omega().get());
  }
  u64 x;
};

template <typename mod_t>
void convolute(mod_t* A, int s1, mod_t* B, int s2) {
  int s = s1 + s2 - 1;
  int size = 1;
  while (size < s) size <<= 1;
  mod_t roots[mod_t::level] = { mod_t::omega() };
  rep(i, 1, mod_t::level) roots[i] = roots[i - 1] * roots[i - 1];
  fill(A + s1, A + size, 0); ntt_dit4(A, size, 1, roots);
  if (A == B && s1 == s2) {
    rep(i, size) A[i] *= A[i];
  } else {
    fill(B + s2, B + size, 0); ntt_dit4(B, size, 1, roots);
    rep(i, size) A[i] *= B[i];
  }
  ntt_dit4(A, size, -1, roots);
  mod_t inv = mod_t(size).inverse();
  rep(i, s) A[i] *= inv;
}

template <typename mod_t>
void rev_permute(mod_t* A, int n) {
  int r = 0, nh = n >> 1;
  rep(i, 1, n) {
    for (int h = nh; !((r ^= h) & h); h >>= 1);
    if (r > i) swap(A[i], A[r]);
  }
}

template <typename mod_t>
void ntt_dit4(mod_t* A, int n, int sign, mod_t* roots) {
  rev_permute(A, n);
  int logn = __builtin_ctz(n);
  if (logn & 1) rep(i, 0, n, 2) {
    mod_t a = A[i], b = A[i + 1];
    A[i] = a + b; A[i + 1] = a - b;
  }
  mod_t imag = roots[mod_t::level - 2];
  if (sign < 0) imag = imag.inverse();

  mod_t one = mod_t(1);
  rep(e, 2 + (logn & 1), logn + 1, 2) {
    const int m = 1 << e;
    const int m4 = m >> 2;
    mod_t dw = roots[mod_t::level - e];
    if (sign < 0) dw = dw.inverse();

    const int block_size = max(m, (1 << 15) / int(sizeof(A[0])));
    rep(k, 0, n, block_size) {
      mod_t w = one, w2 = one, w3 = one;
      rep(j, m4) {
        rep(i, k + j, k + block_size, m) {
          mod_t a0 = A[i + m4 * 0] * one, a2 = A[i + m4 * 1] * w2;
          mod_t a1 = A[i + m4 * 2] * w,   a3 = A[i + m4 * 3] * w3;
          mod_t t02 = a0 + a2, t13 = a1 + a3;
          A[i + m4 * 0] = t02 + t13; A[i + m4 * 2] = t02 - t13;
          t02 = a0 - a2, t13 = (a1 - a3) * imag;
          A[i + m4 * 1] = t02 + t13; A[i + m4 * 3] = t02 - t13;
        }
        w *= dw; w2 = w * w; w3 = w2 * w;
      }
    }
  }
}

const int size = 1 << 17;
using m64 = ntt::Mod64<31525197391593473, 3>;
m64 f[size], g[size];

} // namespace ntt

#define getchar getchar_unlocked
#define putchar putchar_unlocked

int get_int() {
  int n; char c;
  while ((c = getchar()) < '0');
  n = c - '0';
  while ((c = getchar()) >= '0') n = n * 10 + c - '0';
  return n;
}

void put_u32(u32 n) {
  char strs[11];
  int i = 0;
  do {
    strs[i++] = n % 10, n /= 10;
  } while (n);
  while (i) putchar('0' + strs[--i]);
  putchar('\n');
}

int main() {
  using namespace ntt;
  int L = get_int(), M = get_int(), N = get_int();

  const u32 s = 17, mask = 1 << s;
  auto one = m64(1), one17 = m64(mask);
  rep(i, L) {
    int n = get_int();
    f[n >> 1] += (n & 1) ? one17 : one;
  }
  rep(i, M) {
    int n = N - get_int();
    g[n >> 1] += (n & 1) ? one17 : one;
  }
  ntt::convolute(f, N / 2 + 1, g, N / 2 + 1);

  int Q = get_int();

  static int res[200010];
  u64 carry = 0;
  rep(i, 0, (N + Q + 1) / 2) {
    u64 n = f[i].get() + carry;
    res[2 * i + 0] = n & (mask - 1);
    res[2 * i + 1] = (n >> s) & (mask - 1);
    carry = n >> (2 * s);
  }
  rep(i, N, N + Q) put_u32(res[i]);
  return 0;
}

yukicoder

結果

テストケース

ソースコード