#26118 (C++11) No.206 数の積集合を求めるクエリ

提出ソース

結果

問題	No.206 数の積集合を求めるクエリ
コンテスト
ユーザー	Min_25
提出日時	2015-05-11 06:23:46
言語	C++11 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=gnu++11 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	AC
実行時間	39 ms / 7,000 ms
コード長	5,979 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	672 ms
コンパイル使用メモリ	84,944 KB
実行使用メモリ	6,144 KB
最終ジャッジ日時	2024-07-05 22:21:18
合計ジャッジ時間	2,402 ms
ジャッジサーバーID （参考情報）	judge5 / judge2

このコードへのチャレンジ
（要ログイン）

ファイルパターン	結果
sample	AC * 3
other	AC * 28

権限があれば一括ダウンロードができます

ソースコード

raw source code

#include <cstdio>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include <cassert>

#include <iostream>
#include <utility>
#include <algorithm>
#include <queue>
#include <functional>
#include <vector>
#include <map>
#include <set>

#define getchar getchar_unlocked
#define putchar putchar_unlocked

using namespace std;

typedef long long int64;
typedef long long unsigned uint64;
typedef long double float80;
typedef unsigned short uint16;
typedef unsigned uint;
typedef unsigned char uint8;

uint get_uint() {
  uint n;
  int c;
  while( (c = getchar()) < '0') {
    ;
  }
  n = c - '0';
  while( (c = getchar()) >= '0') {
    n = n * 10 + (c - '0');
  }
  return n;
}

void put_uint(uint n) {
  uint8 stack[30];
  int top = 0;
  do {
    stack[top++] = n % 10 + '0';
    n /= 10;
  } while(n != 0);
  while(top > 0) {
    putchar(stack[--top]);
  }
  putchar('\n');
}

typedef __uint128_t uint128;

template <uint64 mod, uint peri, uint64 z>
class Mod64 {
private:
  // ...
  static const uint64 n_prime = 0x3f912fffffffffffllu;
  static const uint64 r2 = 0x0298CD3E4612D42Allu;
  // ...

  uint64 n_;

public:
  Mod64() {}
  Mod64(uint64 v) : n_(v) {}

  static Mod64* roots_;
  static uint period() {
    return peri;
  }
  static void init() {
    Mod64 t = montgomery_init(z);
    for (uint i = 0; i <= peri; ++i) {
      roots_[i] = t;
      t *= t;
    }
  }
  static uint64 montgomery_init(uint64 w) {
    return montgomery_reduction(uint128(w) * r2);
  }
  static uint64 montgomery_reduction(const uint128 w) {
    uint64 x = uint64(w) * n_prime;
    uint128 y = uint128(x) * mod + w;
    uint64 ret = y >> 64;
    if(ret >= mod) {
      ret -= mod;
    }
    return ret;
  }

  Mod64 operator+ (Mod64 rhs) const {
    uint64 ret = (this->n_ + rhs.n_);
    return Mod64(ret >= mod ? ret - mod : ret);
  }
  Mod64 operator- (Mod64 rhs) const {
    uint64 ret = (this->n_ - rhs.n_);
    return Mod64(int64(ret) < 0 ? ret + mod : ret);
  }
  Mod64 operator* (Mod64 rhs) const {
    return Mod64(montgomery_reduction(uint128(this->n_) * rhs.n_));
  }
  Mod64 operator+= (Mod64 rhs) {
    return *this = *this + rhs;
  }
  Mod64 operator-= (Mod64 rhs) {
    return *this = *this - rhs;
  }
  Mod64 operator*= (Mod64 rhs) {
    return *this = *this * rhs;
  }
  uint64 get_value() const {
    return this->n_;
  }
  void set_value(uint64 val) {
    this->n_ = val;
  }
  Mod64 inverse() const {
    return pow_mod(*this, mod - 2);
  }
  static Mod64 pow_mod(Mod64 base, uint64 exp) {
    Mod64 ret = montgomery_init(1);
    while(exp) {
      if(exp & 1) {
        ret *= base;
      }
      exp >>= 1;
      base *= base;
    }
    return ret;
  }
};

const uint64 MOD = 0x3f91300000000001ull;
typedef Mod64<MOD, 44, 0x1941B388165C78EBllu> mod64_t;

mod64_t roots[64];

template <>
mod64_t* mod64_t::roots_ = roots;

// -----------------------------------------------------------------------------

template <typename T>
inline void sumdiff(T& a, T& b) {
  T t = a - b;
  a += b;
  b = t;
}

template <typename T>
void revbin_permute(T* A, uint n) {
  if(n <= 2)
    return;
  uint r = 0;
  uint nh = n >> 1;
  for(uint x = 1; x < n; ++x) {
    uint h = nh;
    while(! ((r ^= h) & h))
      h >>= 1;
    if(r > x)
      swap(A[x], A[r]);
  }
}

template <typename mod_t>
void ntt_dit4_core(mod_t *f, uint ldn, int sign) {
  const uint LX = 2;
  const uint n = 1u << ldn;
  
  if(ldn & 1) {
    for(uint i = 0; i < n; i += 2) {
      sumdiff(f[i], f[i+1]);
    }
  }

  mod_t imag = mod_t::roots_[mod_t::period() - 2];
  if(sign < 0) {
    imag = imag.inverse();
  }

  uint ldm = LX + (ldn & 1);

  mod_t one = mod_t(mod_t::montgomery_init(1));
  for(; ldm <= ldn; ldm += LX) {
    const uint m = 1u << ldm;
    const uint m4 = m >> LX;

    mod_t dw = mod_t::roots_[mod_t::period() - ldm];
    if(sign < 0) {
      dw = dw.inverse();
    }
    mod_t w = one;
    mod_t w2 = w;
    mod_t w3 = w;

    for(uint j = 0; j < m4; ++j) {
      for(uint r = 0, i0 = j + r; r < n; r += m, i0 += m) {
        mod_t a0 = f[i0 + m4 * 0];
        mod_t a2 = f[i0 + m4 * 1] * w2;
        mod_t a1 = f[i0 + m4 * 2] * w;
        mod_t a3 = f[i0 + m4 * 3] * w3;

        mod_t t02 = a0 + a2;
        mod_t t13 = a1 + a3;

        f[i0 + m4 * 0] = t02 + t13;
        f[i0 + m4 * 2] = t02 - t13;

        t02 = a0 - a2;
        t13 = a1 - a3;
        t13 *= imag;

        f[i0 + m4 * 1] = t02 + t13;
        f[i0 + m4 * 3] = t02 - t13;
      }

      w *= dw;
      w2 = w * w;
      w3 = w * w2;
    }
  }
}

template <typename mod_t>
void ntt_dit4(mod_t* f, uint ldn, int sign) {
  revbin_permute(f, 1u << ldn);
  ntt_dit4_core(f, ldn, sign);
}

mod64_t A[1 << 17];
mod64_t B[1 << 17];

uint res[200011];

const uint BITS = 17;
const uint MASK = (1 << BITS) - 1;

void solve() {
  mod64_t::init();

  uint L = get_uint();
  uint M = get_uint();
  uint N = get_uint();

  uint ntt_size = 1;
  uint ldn = 0;
  while (ntt_size < N / 2 + 1) {
    ntt_size <<= 1;
    ldn++;
  }
  ntt_size <<= 1;
  ++ldn;

  mod64_t one = mod64_t::montgomery_init(1);
  mod64_t two17 = mod64_t::montgomery_init(1 << BITS);

  for (uint i = 0; i < L; ++i) {
    uint n = get_uint();
    A[n / 2] += (n & 1 ? two17 : one);
  }
  ntt_dit4(A, ldn, 1);

  for (uint i = 0; i < M; ++i) {
    uint n = N - get_uint();
    B[n / 2] += (n & 1 ? two17 : one);
  }
  ntt_dit4(B, ldn, 1);

  for (uint i = 0; i < ntt_size; ++i) {
    A[i] *= B[i];
  }
  ntt_dit4(A, ldn, -1);

  uint Q = get_uint();

  mod64_t inv = mod64_t( mod64_t::montgomery_init(ntt_size) ).inverse();
  for (uint i = 0; i < N + 1; ++i) {
    A[i] = mod64_t::montgomery_reduction( (A[i] * inv).get_value() );
  }

  uint64 carry = 0;
  for (uint i = 0; i < N + 1; ++i) {
    uint64 n = A[i].get_value() + carry;
    res[2 * i + 0] = n & MASK;
    res[2 * i + 1] = (n >> BITS) & MASK;
    carry = n >> (BITS * 2);
  }

  for (uint i = N; i < N + Q; ++i) {
    put_uint(res[i]);
  }
}

int main() {
  solve();
  return 0;
}

yukicoder

結果

ソースコード