#660191 (C++17) No.430 文字列検索

提出ソース
結果

問題	No.430 文字列検索
コンテスト
ユーザー	stoq
提出日時	2021-05-24 03:21:16
言語	C++17 (gcc 15.2.0 + boost 1.89.0)
結果	AC
実行時間	7 ms / 2,000 ms
コード長	8,358 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	2,177 ms
コンパイル使用メモリ	210,732 KB
最終ジャッジ日時	2025-01-21 17:56:36
ジャッジサーバーID （参考情報）	judge3 / judge4
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 4
other	AC * 14
権限があれば一括ダウンロードができます
ソースコード

raw source code
#define MOD_TYPE 1

#pragma region Macros

#include <bits/stdc++.h>
using namespace std;

#if 0
#include <boost/multiprecision/cpp_int.hpp>
#include <boost/multiprecision/cpp_dec_float.hpp>
using Int = boost::multiprecision::cpp_int;
using lld = boost::multiprecision::cpp_dec_float_100;
#endif
#if 1
#pragma GCC target("avx2")
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")
#endif
using ll = long long int;
using ld = long double;
using pii = pair<int, int>;
using pll = pair<ll, ll>;
using pld = pair<ld, ld>;
template <typename Q_type>
using smaller_queue = priority_queue<Q_type, vector<Q_type>, greater<Q_type>>;

constexpr ll MOD = (MOD_TYPE == 1 ? (ll)(1e9 + 7) : 998244353);
constexpr int INF = (int)1e9 + 10;
constexpr ll LINF = (ll)4e18;
constexpr ld PI = acos(-1.0);
constexpr ld EPS = 1e-7;
constexpr int Dx[] = {0, 0, -1, 1, -1, 1, -1, 1, 0};
constexpr int Dy[] = {1, -1, 0, 0, -1, -1, 1, 1, 0};

#define REP(i, m, n) for (ll i = m; i < (ll)(n); ++i)
#define rep(i, n) REP(i, 0, n)
#define REPI(i, m, n) for (int i = m; i < (int)(n); ++i)
#define repi(i, n) REPI(i, 0, n)
#define MP make_pair
#define MT make_tuple
#define YES(n) cout << ((n) ? "YES" : "NO") << "\n"
#define Yes(n) cout << ((n) ? "Yes" : "No") << "\n"
#define possible(n) cout << ((n) ? "possible" : "impossible") << "\n"
#define Possible(n) cout << ((n) ? "Possible" : "Impossible") << "\n"
#define all(v) v.begin(), v.end()
#define NP(v) next_permutation(all(v))
#define dbg(x) cerr << #x << ":" << x << "\n";

struct io_init
{
  io_init()
  {
    cin.tie(0);
    ios::sync_with_stdio(false);
    cout << setprecision(30) << setiosflags(ios::fixed);
  };
} io_init;
template <typename T>
inline bool chmin(T &a, T b)
{
  if (a > b)
  {
    a = b;
    return true;
  }
  return false;
}
template <typename T>
inline bool chmax(T &a, T b)
{
  if (a < b)
  {
    a = b;
    return true;
  }
  return false;
}
inline ll CEIL(ll a, ll b)
{
  return (a + b - 1) / b;
}
template <typename A, size_t N, typename T>
inline void Fill(A (&array)[N], const T &val)
{
  fill((T *)array, (T *)(array + N), val);
}
template <typename T, typename U>
constexpr istream &operator>>(istream &is, pair<T, U> &p) noexcept
{
  is >> p.first >> p.second;
  return is;
}
template <typename T, typename U>
constexpr ostream &operator<<(ostream &os, pair<T, U> &p) noexcept
{
  os << p.first << " " << p.second;
  return os;
}
#pragma endregion

// --------------------------------------

template <char MIN_CHAR = 'a', int ALPHABET = 26>
struct AhoCorasick
{
  struct node
  {
    // suff : 先頭の文字を最小限消してグラフに存在する頂点にするときの行先の頂点
    // dict : 先頭の文字を最小限消して辞書に存在する単語にするときの行先の頂点
    // depth : Trie木における深さ（省略可能）
    // word_index : このノードで終わる単語のindex（祖先は含まない。なければ-1）（複数ある場合は最小のもの）
    // word_count : このノードで終わる単語の総数
    // link : Trie及びsuffixの辺の接続先頂点（なければ-1）
    int suff = -1, dict = -1, depth = 0;
    int word_index = -1, word_count = 0;
    int link[ALPHABET];
    node() { fill(link, link + ALPHABET, -1); }
    int &operator[](char c) { return link[c - MIN_CHAR]; }
  };

  // nodes : 頂点集合
  // W : 現在の単語数
  // word_location : 各単語のTrie木の最後の頂点のindex
  // defer : 同じ単語が辞書内に存在する場合、最初の単語のindexを記録
  vector<node> nodes;
  int W;
  vector<int> word_location;
  vector<int> word_indices_by_depth;
  vector<int> defer;

  AhoCorasick(){};
  AhoCorasick(const vector<string> &words = {})
  {
    build(words);
  }

  // suffixを親とする木の隣接リスト これの上でDPやクエリ処理を行うことが多い
  vector<vector<int>> build_suffix_adj() const
  {
    vector<vector<int>> adj(nodes.size());
    for (int i = 1; i < int(nodes.size()); i++)
      adj[nodes[i].suff].push_back(i);
    return adj;
  }

  int get_or_add_child(int current, char c)
  {
    if (nodes[current][c] >= 0)
      return nodes[current][c];
    int index = int(nodes.size());
    nodes[current][c] = index;
    nodes.emplace_back();
    nodes.back().depth = nodes[current].depth + 1;
    return index;
  }

  int add_word(const string &word, int word_index)
  {
    assert(!nodes.empty());
    int current = 0;
    for (char c : word)
      current = get_or_add_child(current, c);
    if (nodes[current].word_index < 0)
      nodes[current].word_index = word_index;
    nodes[current].word_count++;
    return current;
  }

  // locationからcを追加したときの行き先 O(1)
  int get_suffix_link(int location, char c) const
  {
    if (location >= 0)
      location = nodes[location].link[c - MIN_CHAR];
    return max(location, 0);
  }

  void build(const vector<string> &words)
  {
    nodes = {node()};
    W = int(words.size());
    word_location.resize(W);
    defer.resize(W);
    int max_depth = 0;

    for (int i = 0; i < W; i++)
    {
      word_location[i] = add_word(words[i], i);
      max_depth = max(max_depth, int(words[i].size()));
      defer[i] = nodes[word_location[i]].word_index;
    }

    // depthの降順に単語indexのリストを作成
    word_indices_by_depth.resize(W);
    vector<int> depth_freq(max_depth + 1, 0);

    for (int i = 0; i < W; i++)
      depth_freq[words[i].size()]++;

    for (int i = max_depth - 1; i >= 0; i--)
      depth_freq[i] += depth_freq[i + 1];

    for (int i = 0; i < W; i++)
      word_indices_by_depth[--depth_freq[words[i].size()]] = i;

    // depth順のBFSでsuffix parentを求める
    vector<int> q = {0};

    for (int i = 0; i < int(q.size()); i++)
    {
      int current = q[i];

      for (char c = MIN_CHAR; c < MIN_CHAR + ALPHABET; c++)
      {
        int &index = nodes[current][c];
        if (index >= 0)
        {
          // currentのsuffix parentで子cを持つものが見つかるまで走査して
          // indexのsuffix parentを見つける
          int suffix_parent = get_suffix_link(nodes[current].suff, c);
          nodes[index].suff = suffix_parent;
          nodes[index].word_count += nodes[suffix_parent].word_count;
          nodes[index].dict = nodes[suffix_parent].word_index < 0 ? nodes[suffix_parent].dict : suffix_parent;
          q.push_back(index);
        }
        else
        {
          index = get_suffix_link(nodes[current].suff, c);
        }
      }
    }
  }

  // 辞書内のそれぞれの単語がtextに何個含まれているか O(text length + num words)
  vector<int> count_matches(const string &text) const
  {
    vector<int> matches(W, 0);
    int current = 0;

    for (char c : text)
    {
      current = get_suffix_link(current, c);
      int dict_node = nodes[current].word_index < 0 ? nodes[current].dict : current;

      if (dict_node >= 0)
        matches[nodes[dict_node].word_index]++;
    }

    // depthの降順に見る
    for (int word_index : word_indices_by_depth)
    {
      int location = word_location[word_index];
      int dict_node = nodes[location].dict;

      if (dict_node >= 0)
        matches[nodes[dict_node].word_index] += matches[word_index];
    }

    for (int i = 0; i < W; i++)
      matches[i] = matches[defer[i]];

    return matches;
  }

  // textに含まれる辞書内の単語で、textのi文字目で終わるものの個数 O(text length)
  vector<int> count_matches_by_position(const string &text) const
  {
    vector<int> matches(text.size());
    int current = 0;

    for (int i = 0; i < int(text.size()); i++)
    {
      current = get_suffix_link(current, text[i]);
      matches[i] = nodes[current].word_count;
    }

    return matches;
  }

  // textに辞書内の単語が合計何個含まれているか O(text length)
  int64_t count_total_matches(const string &text) const
  {
    int64_t matches = 0;
    int current = 0;

    for (char c : text)
    {
      current = get_suffix_link(current, c);
      matches += nodes[current].word_count;
    }

    return matches;
  }
};

void solve()
{
  string s;
  cin >> s;
  int m;
  cin >> m;
  vector<string> v(m);
  rep(i, m) cin >> v[i];
  AhoCorasick<'A', 26> aho(v);
  cout << aho.count_total_matches(s) << "\n";
}

int main()
{
  solve();
}
yukicoder

結果

ソースコード