#608160 (C++14) No.430 文字列検索

提出ソース
結果

問題	No.430 文字列検索
コンテスト
ユーザー	fuppy_kyopro
提出日時	2021-01-22 19:13:03
言語	C++14 (gcc 15.2.0 + boost 1.89.0) コンパイル: `g++-15 -O2 -lm -std=c++14 -Wuninitialized -DONLINE_JUDGE -o a.out _filename_` 実行: `./a.out`
結果	AC
実行時間	43 ms / 2,000 ms
コード長	13,392 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	1,791 ms
コンパイル使用メモリ	204,752 KB
実行使用メモリ	28,436 KB
最終ジャッジ日時	2026-05-09 11:30:00
合計ジャッジ時間	3,390 ms
ジャッジサーバーID （参考情報）	judge1_1 / judge3_1
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 4
other	AC * 14
権限があれば一括ダウンロードができます
ソースコード

raw source code
/*
#pragma GCC target("avx2")
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")
//*/
 
#include <bits/stdc++.h>
 
// #include <atcoder/all>
 
using namespace std;
// using namespace atcoder;
 
#define DEBUG(x) cerr<<#x<<": "<<x<<endl;
#define DEBUG_VEC(v) cerr<<#v<<":";for(int i=0;i<v.size();i++) cerr<<" "<<v[i]; cerr<<endl;
#define DEBUG_MAT(v) cerr<<#v<<endl;for(int i=0;i<v.size();i++){for(int j=0;j<v[i].size();j++) {cerr<<v[i][j]<<" ";}cerr<<endl;}
typedef long long ll;
// #define int ll
 
#define vi vector<int>
#define vl vector<ll>
#define vii vector< vector<int> >
#define vll vector< vector<ll> >
#define vs vector<string>
#define pii pair<int,int>
#define pis pair<int,string>
#define psi pair<string,int>
#define pll pair<ll,ll>
template<class S, class T> pair<S, T> operator+(const pair<S, T> &s, const pair<S, T> &t) { return pair<S, T>(s.first + t.first, s.second + t.second); }
template<class S, class T> pair<S, T> operator-(const pair<S, T> &s, const pair<S, T> &t) { return pair<S, T>(s.first - t.first, s.second - t.second); }
template<class S, class T> ostream& operator<<(ostream& os, pair<S, T> p) { os << "(" << p.first << ", " << p.second << ")"; return os; }
#define X first
#define Y second
#define rep(i,n) for(int i=0;i<(int)(n);i++)
#define rep1(i,n) for(int i=1;i<=(int)(n);i++)
#define rrep(i,n) for(int i=(int)(n)-1;i>=0;i--)
#define rrep1(i,n) for(int i=(int)(n);i>0;i--)
#define REP(i,a,b) for(int i=a;i<b;i++)
#define in(x, a, b) (a <= x && x < b)
#define all(c) c.begin(),c.end()
void YES(bool t=true) {cout<<(t?"YES":"NO")<<endl;}
void Yes(bool t=true) {cout<<(t?"Yes":"No")<<endl;}
void yes(bool t=true) {cout<<(t?"yes":"no")<<endl;}
void NO(bool t=true) {cout<<(t?"NO":"YES")<<endl;}
void No(bool t=true) {cout<<(t?"No":"Yes")<<endl;}
void no(bool t=true) {cout<<(t?"no":"yes")<<endl;}
template<class T> bool chmax(T &a, const T &b) { if (a<b) { a = b; return 1; } return 0; }
template<class T> bool chmin(T &a, const T &b) { if (a>b) { a = b; return 1; } return 0; }
#define UNIQUE(v) v.erase(std::unique(v.begin(), v.end()), v.end());
const ll inf = 1000000001;
const ll INF = (ll)1e18 + 1;
const long double pi = 3.1415926535897932384626433832795028841971L;
int popcount(ll t) { return __builtin_popcountll(t); }
// int dx[4] = {1, 0, -1, 0}, dy[4] = {0, 1, 0, -1};
// int dx2[8] = { 1,1,0,-1,-1,-1,0,1 }, dy2[8] = { 0,1,1,1,0,-1,-1,-1 };
vi dx = {0, 1, 0, -1}, dy = {-1, 0, 1, 0};
// vi dx2 = { 1,1,0,-1,-1,-1,0,1 }, dy2 = { 0,1,1,1,0,-1,-1,-1 };
struct Setup_io {
    Setup_io() {
        ios_base::sync_with_stdio(0), cin.tie(0), cout.tie(0);
        cout << fixed << setprecision(25);
    }
} setup_io;
// const ll MOD = 1000000007;
const ll MOD = 998244353;
// #define mp make_pair
//#define endl '\n'


template<int char_size>
struct TrieNode {
    // 持たせたい好きな状態を持たせよう

    // 持たせたい好きな状態を持たせよう
    int next_idx[char_size]; // Trie木上での次の状態のインデックス，存在しない時は-1
    vector<int> exist_char; // 続きのノードが存在する文字
    vector<int> state_idx; // ちょうどこのノードと同じ文字列のインデックス（同じ文字列が複数回追加される時もあるので）
    int sub_num = 0; // このノード以下に存在する追加された文字列の数（!= ノードの数）

    TrieNode() {
        for (int i = 0; i < char_size; i++) next_idx[i] = -1;
    }
};

template<int char_size, int initial_char>
struct Trie {
    using Node = TrieNode<char_size>;

    vector<Node> nodes;
    int str_num=0; // 追加された文字列の数（!= ノードの数）
    vector<string> strs; // 追加された文字列とそのidの対応表，idが連番じゃない時はunordered_mapに変えて頑張って
    Trie() {
        nodes.push_back(Node());
    }

    void add(string& s, int str_idx, int node_idx, int id) {
        // sと完全に一致するノード
        if (str_idx == s.size()) {
            nodes[node_idx].state_idx.push_back(id);
            nodes[node_idx].sub_num++;
            return;
        }
        // sの次の文字に対応するノードがまだない
        if (nodes[node_idx].next_idx[s[str_idx] - initial_char] == -1) {
            nodes[node_idx].next_idx[s[str_idx] - initial_char] = nodes.size();
            nodes[node_idx].exist_char.push_back(s[str_idx] - initial_char);
            nodes.push_back(Node());
        }
        add(s, str_idx+1, nodes[node_idx].next_idx[s[str_idx] - initial_char], id);
        nodes[node_idx].sub_num++;
    }

    void add(string& s, int id=-1) {
        if (id == -1) id = str_num;
        add(s, 0, 0, id);
        str_num++;
        strs.push_back(s);
    }

    // MLE注意かも
    int sub(string& s, int str_idx, int node_idx) {
        // sと完全に一致するノード
        if (str_idx == s.size()) {
            assert(nodes[node_idx].state_idx.size() > 0);
            // 適当に最後に突っ込まれたidを削除している
            int id = nodes[node_idx].state_idx.back();
            nodes[node_idx].state_idx.pop_back();
            nodes[node_idx].sub_num--;
            return id;
        }

        int next_node_idx = nodes[node_idx].next_idx[s[str_idx] - initial_char];
        int id = sub(s, str_idx+1, next_node_idx);
        nodes[node_idx].sub_num--;
        assert(nodes[node_idx].sub_num >= 0);

        // sの次の文字に対応するノードは消えたが，このノードは消えないとき（このノードが根 or 他にも子がいる）
        if (nodes[next_node_idx].sub_num == 0 and (nodes[node_idx].sub_num > 0 or node_idx == 0)) {
            nodes[node_idx].next_idx[s[str_idx] - initial_char] = -1;
            rep (i, nodes[node_idx].exist_char.size()) {
                if (nodes[node_idx].exist_char[i] == s[str_idx] - initial_char) {
                    nodes[node_idx].exist_char.erase(nodes[node_idx].exist_char.begin() + i);
                    break;
                }
            }
        }
        return id;
    }
    
    // 削除した文字列のidを返す
    int sub(string& s) {
        int id = sub(s, 0, 0);
        str_num--;
        strs[id].clear();
        strs[id].shrink_to_fit();
        return id;
    }

    void query(string& s, int str_idx, int node_idx) {
        for (int id: nodes[node_idx].state_idx) {
            // s[0, str_idx)にちょうど一致する追加された文字列たちに対する処理
        }
        if (str_idx == s.size()) {
            // ちょうどsと一致するノードが見つかったよ！
            return;
        }

        if (nodes[node_idx].next_idx[s[str_idx] - initial_char] == -1) {
            // これ以上sのprefixに共通するノードはない
            return;
        }
        return query(s, str_idx + 1, nodes[node_idx].next_idx[s[str_idx] - initial_char]);
    }

    // sのprefixに対する何らかの処理をしたい時
    void query(string s) {
        query(s, 0, 0);
    }
    
    void dfs(int node_idx, int depth) {
        // 今の頂点に関する何らかの処理
        // 今の頂点に関する何らかの処理（終わり）

        for (int c: nodes[node_idx].exist_char) {
            assert(nodes[node_idx].next_idx[c] != -1);
            dfs(nodes[node_idx].next_idx[c], depth + 1);
        }
    }

    // 根からdfsしたい時
    void dfs() {
        dfs(0, 0);
    }

    string xor_min(string& s, int str_idx, int node_idx, string& t) {
        if (str_idx == s.size()) {
            // ちょうどsと一致するノードが見つかったよ！
            return t;
        }

        if (nodes[node_idx].next_idx[s[str_idx] - initial_char] != -1) {
            t += s[str_idx];
            return xor_min(s, str_idx + 1, nodes[node_idx].next_idx[s[str_idx] - initial_char], t);
        }
        else {
            char c = '0' + ('1' - s[str_idx]);
            t += c;
            return xor_min(s, str_idx + 1, nodes[node_idx].next_idx[c - initial_char], t);
        }
    }

    // sとxorした時に最も小さくなる文字列を探索
    string xor_min(string& s) {
        assert(initial_char == '0' and char_size == 2);
        assert(str_num > 0);
        string res;
        return xor_min(s, 0, 0, res);
    }
};

template< int char_size, int initial_char >
struct AhoCorasick : Trie< char_size + 1, initial_char > {

    const int FAIL = char_size; // next_idx で失敗した時の行き先をたどるための文字、最長の共通接尾辞の頂点に行く（なければ根）
    vector<int> suf_cnt; // 現在のノードのsuffixに含まれる辞書に含まれる文字列の総和
    bool is_built = false;

    // node_idxの状態に文字cが追加された際に訪れるノード
    int move(int node_idx, int c) {
        return this->nodes[node_idx].next_idx[c - initial_char];
    }

    void build() {
        is_built = true;
        suf_cnt.resize(this->nodes.size());

        // 自分そのものの数
        for (int i = 0; i < this->nodes.size(); i++) {
            suf_cnt[i] = this->nodes[i].state_idx.size();
        }

        // 根ルートと長さ一の文字列は別に処理
        queue<int> qu;
        for (int i = 0; i <= char_size; i++) {
            if (this->nodes[0].next_idx[i] == -1) {
                this->nodes[0].next_idx[i] = 0;
            }
            else {
                int next_node = this->nodes[0].next_idx[i];
                this->nodes[next_node].next_idx[FAIL] = 0;
                qu.push(next_node);
            }
        }

        while (qu.size()) {
            int node_idx = qu.front();
            qu.pop();
            int fail_idx = this->nodes[node_idx].next_idx[FAIL];
            suf_cnt[node_idx] += suf_cnt[fail_idx]; // 頭をとった接尾辞の分を足す

            // cを追加したノードの失敗の行き先は、このノードの失敗の行き先にcを追加したノード
            // https://jetbead.hatenablog.com/entry/20121027/1351317982
            for (int c = 0; c < char_size; c++) {
                if (this->nodes[node_idx].next_idx[c] == -1) {
                    this->nodes[node_idx].next_idx[c] = move(fail_idx, c + initial_char);
                    continue;
                }
                int next_node = this->nodes[node_idx].next_idx[c];
                this->nodes[next_node].next_idx[FAIL] = this->nodes[fail_idx].next_idx[c];
                qu.push(next_node);
            }
        }
    }

    // sの部分文字列中に含まれる辞書の文字の数の総和（種類ではない）O(|s|)
    ll match(string s) {
        assert(is_built);
        ll res = 0;
        int node_idx = 0;
        for (int i = 0; i < s.size(); i++) {
            node_idx = move(node_idx, s[i]);
            res += suf_cnt[node_idx];
        }
        return res;
    }

    // sの部分文字列に含まれる辞書の文字それぞれについての回数
    vl frequency(string s) {
        assert(is_built);
        vl num(this->nodes.size());
        int node_idx = 0;
        for (int i = 0; i < s.size(); i++) {
            node_idx = move(node_idx, s[i]);
            num[node_idx]++;
        }

        queue<int> qu;
        qu.push(0);
        vi path;
        while (qu.size()) {
            int node_idx = qu.front();
            qu.pop();
            path.push_back(node_idx);
            for (int c: this->nodes[node_idx].exist_char) {
                qu.push(this->nodes[node_idx].next_idx[c]);
            }
        }

        vl res(this->strs.size());
        for (int i = (int)path.size() - 1; i >= 0; i--) {
            int node_idx = path[i];
            for (int u: this->nodes[node_idx].state_idx) {
                res[u] += num[node_idx];
            }
            num[move(node_idx, FAIL + initial_char)] += num[node_idx];
        }
        return res;
    }
};

struct dice {
  mt19937 mt;
  dice() : mt(chrono::steady_clock::now().time_since_epoch().count()) {}
  // [0, x)の一様乱数
  ll operator()(ll x) { return this->operator()(0, x); }
  // [x, y)の一様乱数
  ll operator()(ll x, ll y) {
    uniform_int_distribution<ll> dist(x, y - 1);
    return dist(mt);
  }
  vl operator()(int n, ll x, ll y) {
    vl res(n);
    for (int i = 0; i < n; i++) res[i] = this->operator()(x, y);
    return res;
  }
} rnd;

signed main() {
    string s;
    cin >> s;
    AhoCorasick<26, 'A'> aho;
    int m;
    cin >> m;
    vector<string> t(m);
    rep (i, m) {
        cin >> t[i];
        aho.add(t[i]);
    }
    rep (i, m) {
        aho.sub(t[i]);
    }
    vector<string> add;
    rep (i, 10000) {
        if (rnd(2)) {
            int u = rnd(m);
            aho.add(t[u]);
            add.push_back(t[u]);
        }
        else {
            string tt;
            rep (j, 10) {
                tt += (char)('A' + rnd(26));
            }
            aho.add(tt);
            add.push_back(tt);
        }
    }

    int kk = 4000;
    for (int i = kk; i < add.size(); i++) {
        aho.sub(add[i]);
    }
    rep (i, m) aho.add(t[i]);
    for (int i = 0; i < kk; i++) {
        aho.sub(add[i]);
    }

    aho.build();

    ll ans1 = aho.match(s);

    vl temp = aho.frequency(s);
    ll ans2 = 0;
    rep (i, temp.size()) ans2 += temp[i];
    assert(ans1 == ans2);
    cout << ans2 << endl;
}
yukicoder

結果

ソースコード