#268822 (C++14) No.206 数の積集合を求めるクエリ

提出ソース

結果

問題	No.206 数の積集合を求めるクエリ
ユーザー	tsutaj
提出日時	2018-06-26 17:16:03
言語	C++14 (gcc 12.3.0 + boost 1.83.0)
結果	AC
実行時間	1,762 ms / 7,000 ms
コード長	4,058 bytes
コンパイル時間	1,229 ms
コンパイル使用メモリ	86,720 KB
実行使用メモリ	21,900 KB
最終ジャッジ日時	2024-06-30 22:57:14
合計ジャッジ時間	26,397 ms
ジャッジサーバーID （参考情報）	judge4 / judge3

このコードへのチャレンジ
（要ログイン）

テストケース

テストケース表示

入力	結果	実行時間実行使用メモリ
testcase_00	AC	3 ms 6,812 KB
testcase_01	AC	3 ms 6,940 KB
testcase_02	AC	5 ms 6,944 KB
testcase_03	AC	2 ms 6,940 KB
testcase_04	AC	3 ms 6,944 KB
testcase_05	AC	3 ms 6,940 KB
testcase_06	AC	49 ms 6,940 KB
testcase_07	AC	49 ms 6,940 KB
testcase_08	AC	49 ms 6,940 KB
testcase_09	AC	49 ms 6,940 KB
testcase_10	AC	2 ms 6,944 KB
testcase_11	AC	5 ms 6,940 KB
testcase_12	AC	53 ms 6,940 KB
testcase_13	AC	53 ms 6,940 KB
testcase_14	AC	53 ms 6,944 KB
testcase_15	AC	53 ms 6,940 KB
testcase_16	AC	52 ms 6,940 KB
testcase_17	AC	1,633 ms 21,772 KB
testcase_18	AC	1,602 ms 21,772 KB
testcase_19	AC	1,628 ms 21,768 KB
testcase_20	AC	1,601 ms 21,900 KB
testcase_21	AC	1,610 ms 21,768 KB
testcase_22	AC	1,604 ms 21,772 KB
testcase_23	AC	1,626 ms 21,772 KB
testcase_24	AC	1,762 ms 21,740 KB
testcase_25	AC	1,750 ms 21,772 KB
testcase_26	AC	1,729 ms 21,772 KB
testcase_27	AC	1,692 ms 21,768 KB
testcase_28	AC	1,739 ms 21,896 KB
testcase_29	AC	1,737 ms 21,772 KB
testcase_30	AC	1,732 ms 21,744 KB

権限があれば一括ダウンロードができます

ソースコード

raw source code

#include <cstdio>
#include <vector>
#include <algorithm>
#include <cassert>
#include <iostream>
using namespace std;
using ll = long long int;

// Garner のアルゴリズム ( 計算量 O(N^2) )
// x ≡ a_1 (mod m_1), ..., x ≡ a_N (mod m_N) を満たす最小の x を返す
// m_1, m_2, ... m_N は相異なる素数である必要がある

// x = k_0 + k_1*m_1 + k_2*(m_1*m_2) + ... k_{N-1}*(m_1*m_2* ... *m_{N-1}) として、
// 1 つ目の式から順に操作することで係数 k_i を下から決定していく

ll mod_pow(ll X, ll N, ll mod) {
    ll ret = 1;
    for(; N>0; N>>=1) {
        if(N & 1) (ret *= X) %= mod;
        (X *= X) %= mod;
    }
    return ret;
}

ll garner(const vector<ll>& values, const vector<ll>& mods, ll mod) {
    assert(values.size() == mods.size());
    int N = values.size();
    vector<ll> coeff(N);

    for(int i=0; i<N; i++) {
        ll val_A = values[i] % mods[i];
        ll val_B = 0, mod_acc = 1;
        for(int j=0; j<i; j++) {
            (val_B += mod_acc * coeff[j]) %= mods[i];
            (mod_acc *= mods[j]) %= mods[i];
        }

        ll val = (val_A - val_B + mods[i]) % mods[i];
        (val *= mod_pow(mod_acc, mods[i]-2, mods[i])) %= mods[i];
        coeff[i] = val;
    }

    ll ans = 0, mod_acc = 1;
    for(int i=0; i<N; i++) {
        (ans += (mod_acc * coeff[i]) % mod) %= mod;
        (mod_acc *= mods[i]) %= mod;
    }
    return ans;
}

// NTT (剰余環を用いた FFT)
// garner のアルゴリズムが必須

template<ll mod, ll primitive_root>
struct NTT {
    int get_mod() { return mod; }

    vector<ll> dft(const vector<ll>& A, int N, int sgn = 1) {
        if(N == 1) return A;
        vector<ll> F(N / 2), G(N / 2);
        for(int i=0; i<N/2; i++) {
            F[i] = A[2*i + 0];
            G[i] = A[2*i + 1];
        }

        F = dft(F, N / 2, sgn);
        G = dft(G, N / 2, sgn);

        ll zeta = mod_pow(primitive_root, (mod - 1) / N, mod);
        if(sgn < 0) zeta = mod_pow(zeta, mod - 2, mod);

        vector<ll> ret(N);
        ll pow_zeta = 1;
        for(int i=0; i<N; i++) {
            ret[i] = (F[i % (N / 2)] + pow_zeta * G[i % (N / 2)]) % mod;
            (pow_zeta *= zeta) %= mod;
        }
        return ret;
    }

    vector<ll> inv_dft(const vector<ll>& A, int N) {
        vector<ll> ret = dft(A, N, -1);
        ll inv_N = mod_pow(N, mod-2, mod);
        for(int i=0; i<N; i++) {
            (ret[i] *= inv_N) %= mod;
        }
        return ret;
    }

    vector<ll> multiply(vector<ll> A, vector<ll> B) {
        int sz = A.size() + B.size() + 1;
        int N = 1; while(N < sz) N *= 2;

        A.resize(N), B.resize(N);
        A = dft(A, N), B = dft(B, N);

        vector<ll> F(N);
        for(int i=0; i<N; i++) {
            F[i] = (A[i] * B[i]) % mod;
        }
        return inv_dft(F, N);
    }
};

using NTT_1 = NTT< 167772161, 3>;
using NTT_2 = NTT< 469762049, 3>;
using NTT_3 = NTT<1224736769, 3>;

vector<ll> convolution_using_ntt(vector<ll> A, vector<ll> B, ll mod) {
    for(auto &x : A) x %= mod;
    for(auto &x : B) x %= mod;

    NTT_1 ntt_1; NTT_2 ntt_2; NTT_3 ntt_3;
    vector< vector<ll> > convo(3);
    convo[0] = ntt_1.multiply(A, B);
    convo[1] = ntt_2.multiply(A, B);
    convo[2] = ntt_3.multiply(A, B);

    int N = convo[0].size();
    vector<ll> ret(N), mods(3);
    mods[0] = ntt_1.get_mod();
    mods[1] = ntt_2.get_mod();
    mods[2] = ntt_3.get_mod();

    for(int i=0; i<N; i++) {
        vector<ll> values(3);
        for(int k=0; k<3; k++) {
            values[k] = convo[k][i];
        }
        ret[i] = garner(values, mods, mod);
    }
    return ret;
}

int main() {
    int L, M, N; cin >> L >> M >> N;

    vector<ll> X(N), Y(N);
    for(int i=0; i<L; i++) {
        int val; cin >> val; val--;
        X[val]++;
    }
    for(int i=0; i<M; i++) {
        int val; cin >> val;
        Y[N - val]++;
    }

    vector<ll> ans = convolution_using_ntt(X, Y, 1LL << 60);

    int Q; cin >> Q;
    for(int i=0; i<Q; i++) {
        cout << ans[N+i-1] << endl;
    }
    return 0;
}

yukicoder

結果

テストケース

ソースコード