結果

問題 No.430 文字列検索
ユーザー aaaaaaaaaa
提出日時 2021-08-12 16:17:48
言語 PyPy3
(7.3.15)
結果
AC  
実行時間 172 ms / 2,000 ms
コード長 3,787 bytes
コンパイル時間 278 ms
コンパイル使用メモリ 82,504 KB
実行使用メモリ 111,564 KB
最終ジャッジ日時 2024-04-09 20:38:02
合計ジャッジ時間 2,948 ms
ジャッジサーバーID
(参考情報)
judge5 / judge3
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 35 ms
55,300 KB
testcase_01 AC 172 ms
111,564 KB
testcase_02 AC 99 ms
88,540 KB
testcase_03 AC 99 ms
88,064 KB
testcase_04 AC 36 ms
55,568 KB
testcase_05 AC 35 ms
55,780 KB
testcase_06 AC 38 ms
55,252 KB
testcase_07 AC 35 ms
56,292 KB
testcase_08 AC 53 ms
70,828 KB
testcase_09 AC 58 ms
74,016 KB
testcase_10 AC 54 ms
70,220 KB
testcase_11 AC 157 ms
104,224 KB
testcase_12 AC 157 ms
107,580 KB
testcase_13 AC 165 ms
107,108 KB
testcase_14 AC 152 ms
101,320 KB
testcase_15 AC 146 ms
94,968 KB
testcase_16 AC 136 ms
94,772 KB
testcase_17 AC 129 ms
94,700 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

from collections import defaultdict

class Trie:
    def __init__(self, base=ord("a"), char_size=26):
        self.nexts = []
        self.accepts = []
        self.cs = []
        self.commons = []
        self.base = base
        self.char_size = char_size
        self.root = 0
        self.words = defaultdict(set)
        self._construct_node(self.root)

    def _construct_node(self, n):
        for i in range(self.char_size):
            self.nexts.append(-1)
        self.accepts.append(0)
        self.cs.append(n)
        self.commons.append(0)

    def insert(self, word):
        node_id = self.root
        for i in range(len(word)):
            c = ord(word[i])-self.base
            next_id = self.nexts[self.char_size*node_id+c]
            if next_id == -1:
                next_id = len(self.cs)
                self.nexts[self.char_size*node_id+c] = next_id
                self._construct_node(c)
            self.commons[node_id] += 1
            node_id = next_id

        self.commons[node_id] += 1
        self.accepts[node_id] += 1
        return node_id

    def search(self, word, prefix=False):
        node_id = self.root
        for i in range(len(word)):
            c = ord(word[i]) - self.base
            next_id = self.nexts[self.char_size*node_id+c]
            if next_id == -1:
                return False
            node_id = next_id
        if prefix:
            return True
        return self.accepts[node_id] > 0
    
    def size(self):
        return len(self.cs)
    
    def count(self):
        return self.commons[0]

from collections import deque, defaultdict

class AhoCorasickTrie(Trie):
    def __init__(self, patterns, base=ord("a"), char_size=26):
        super().__init__(base, char_size)
        self.patterns = patterns
        self.pattern_len = [len(pat) for pat in patterns]
        self.matches = defaultdict(set)
        for i, pat in enumerate(patterns):
            node_id = self.insert(pat)
            self.matches[node_id].add(i)
        self.back_edge = self._BFS()

    def _BFS(self):
        back_edge = [0]*self.size()

        Q = deque([])

        for i in range(self.char_size):
            v1 = self.nexts[self.char_size*self.root+i]
            if v1 != -1:
                Q.append(v1)
                back_edge[v1] = self.root

        while len(Q) > 0:
            v = Q.pop()
            if self.commons[v] == 0:
                continue
            for i in range(self.char_size):
                v1 = self.nexts[self.char_size*v+i]
                if v1 != -1:
                    back = back_edge[v]
                    while self.nexts[self.char_size*back+i] == -1 and back != self.root:
                        back = back_edge[back]
                    if self.nexts[self.char_size*back+i] == -1:
                        back_edge[v1] = self.root
                    else:
                        back_edge[v1] = self.nexts[self.char_size*back+i]
                        self.matches[v1] |= self.matches[back_edge[v1]]
                    Q.appendleft(v1)
        return back_edge        
        
    def match(self, S):
        now = self.root
        match_list = defaultdict(list)
        for i in range(len(S)):
            c = ord(S[i]) - self.base
            while self.nexts[self.char_size*now+c] == -1 and now != self.root:
                now = self.back_edge[now]
            if self.nexts[self.char_size*now+c] != -1:
                now = self.nexts[self.char_size*now+c]
                for pat in self.matches[now]:
                    match_list[pat].append(i-self.pattern_len[pat]+1)
        return match_list

S = input()
M = int(input())
C = [input() for i in range(M)]

act = AhoCorasickTrie(C, base=ord("A"))

res = act.match(S)

print(sum(len(c) for _,c in res.items()))
0