結果
問題 |
No.874 正規表現間距離
|
ユーザー |
![]() |
提出日時 | 2025-05-14 13:10:53 |
言語 | PyPy3 (7.3.15) |
結果 |
TLE
|
実行時間 | - |
コード長 | 9,258 bytes |
コンパイル時間 | 158 ms |
コンパイル使用メモリ | 82,356 KB |
実行使用メモリ | 163,396 KB |
最終ジャッジ日時 | 2025-05-14 13:12:17 |
合計ジャッジ時間 | 6,286 ms |
ジャッジサーバーID (参考情報) |
judge2 / judge3 |
(要ログイン)
ファイルパターン | 結果 |
---|---|
sample | -- * 4 |
other | AC * 17 TLE * 1 -- * 20 |
ソースコード
import heapq import sys # Use sys.stdin.readline for faster input reading input = sys.stdin.readline # Define a constant for Epsilon transitions in the NFA. # Using -1 as it doesn't conflict with ASCII ordinals of lowercase letters. EPSILON = -1 # Function to parse a regular expression string and build a Non-deterministic Finite Automaton (NFA). # The NFA uses Thompson's construction principles. def build_nfa(regex): """ Builds an NFA from a given regular expression string. Args: regex (str): The regular expression string. Returns: tuple: (num_states, start_node, final_node, adj) num_states: Total number of states in the NFA. start_node: The ID of the start state. final_node: The ID of the final state. adj: Adjacency list representation of the NFA. Dictionary mapping state_id to list of tuples (label, to_state_id). 'label' is either EPSILON or the ASCII ordinal of a character. """ adj = {} # Adjacency list: state_id -> list of (label, to_state_id) num_states = 1 # Start with state 0 start_node = 0 curr_node = 0 # Track the current state during construction adj[curr_node] = [] # Initialize adjacency list for the start node i = 0 while i < len(regex): char = regex[i] # Check if the character is followed by a quantifier ('?' or '*') if i + 1 < len(regex) and regex[i+1] in ['?', '*']: quantifier = regex[i+1] if quantifier == '?': # Optional character c?: Matches c zero or one time. # Need a new state after this component. new_node = num_states num_states += 1 adj[new_node] = [] # Initialize adj list for the new state # Add transitions from the current node to the new node: if curr_node not in adj: adj[curr_node] = [] # Path for zero occurrence (skip c): Epsilon transition adj[curr_node].append((EPSILON, new_node)) # Path for one occurrence (match c): Transition on character c adj[curr_node].append((ord(char), new_node)) curr_node = new_node # Update current node i += 2 # Move index past 'c' and '?' else: # quantifier == '*' # Star character c*: Matches c zero or more times. # Need two new states: one intermediate for the loop, one after the structure. inter_node = num_states # Intermediate state for the c* loop num_states += 1 adj[inter_node] = [] new_node = num_states # State after the entire c* structure num_states += 1 adj[new_node] = [] # Connect current node to the star structure and the node after it if curr_node not in adj: adj[curr_node] = [] # Path to enter the loop structure adj[curr_node].append((EPSILON, inter_node)) # Path to skip the star structure entirely (0 occurrences) adj[curr_node].append((EPSILON, new_node)) # Set up the loop within the star structure adj[inter_node].append((ord(char), inter_node)) # Loop back on matching c adj[inter_node].append((EPSILON, new_node)) # Exit the loop curr_node = new_node # Update current node to the state after c* i += 2 # Move index past 'c' and '*' else: # Literal character (no quantifier following) # Simple transition on the character to a new state. new_node = num_states num_states += 1 adj[new_node] = [] if curr_node not in adj: adj[curr_node] = [] adj[curr_node].append((ord(char), new_node)) # Transition on the character curr_node = new_node # Update current node i += 1 # Move index past the character final_node = curr_node # The last node reached during construction is the final state of the NFA. # Ensure all state IDs up to num_states-1 have an entry in the adj dictionary. # This prevents KeyError if a state has no outgoing transitions. for s_id in range(num_states): if s_id not in adj: adj[s_id] = [] return num_states, start_node, final_node, adj # Read the two regular expressions A and B from standard input A = input().strip() B = input().strip() # Build the NFAs for A and B num_states_A, start_A, final_A, adj_A = build_nfa(A) num_states_B, start_B, final_B, adj_B = build_nfa(B) # Compute the minimum edit distance using Dijkstra's algorithm on the product automaton. # The state space of the product automaton consists of pairs (state_A, state_B). # dist dictionary stores the minimum edit distance found so far to reach each state pair. dist = {} # Priority queue stores tuples (current_distance, state_A, state_B), ordered by distance. pq = [(0, start_A, start_B)] dist[(start_A, start_B)] = 0 # The distance to the initial state pair (start_A, start_B) is 0. while pq: # Extract the state pair with the smallest distance from the priority queue. d, u_A, u_B = heapq.heappop(pq) # If we have already found a shorter path to this state pair, skip processing. current_dist = dist.get((u_A, u_B), float('inf')) if d > current_dist: continue # Explore possible transitions (corresponding to edit operations) from the current state pair (u_A, u_B). # 1. Epsilon transitions (cost 0): These correspond to internal NFA transitions that don't consume input characters. # Check for Epsilon transitions originating from state u_A in NFA A. for label_A, v_A in adj_A.get(u_A, []): if label_A == EPSILON: new_dist = d # Epsilon transitions have zero cost. # If this path is shorter than any known path to (v_A, u_B), update distance and push to PQ. if new_dist < dist.get((v_A, u_B), float('inf')): dist[(v_A, u_B)] = new_dist heapq.heappush(pq, (new_dist, v_A, u_B)) # Check for Epsilon transitions originating from state u_B in NFA B. for label_B, v_B in adj_B.get(u_B, []): if label_B == EPSILON: new_dist = d # Epsilon transitions have zero cost. # If this path is shorter than any known path to (u_A, v_B), update distance and push to PQ. if new_dist < dist.get((u_A, v_B), float('inf')): dist[(u_A, v_B)] = new_dist heapq.heappush(pq, (new_dist, u_A, v_B)) # 2. Character transitions (representing edit operations with costs 0 or 1) # Match / Replace operation: Consume characters from both A and B simultaneously. # Iterate over character transitions from u_A in NFA A. for label_A, v_A in adj_A.get(u_A, []): if label_A != EPSILON: # Iterate over character transitions from u_B in NFA B. for label_B, v_B in adj_B.get(u_B, []): if label_B != EPSILON: # Calculate cost: 0 if characters match, 1 if they differ (replacement needed). cost = 0 if label_A == label_B else 1 new_dist = d + cost # If this path is shorter to reach (v_A, v_B), update distance and push to PQ. if new_dist < dist.get((v_A, v_B), float('inf')): dist[(v_A, v_B)] = new_dist heapq.heappush(pq, (new_dist, v_A, v_B)) # Delete operation: Consume a character from A only (cost 1). # Iterate over character transitions from u_A in NFA A. for label_A, v_A in adj_A.get(u_A, []): if label_A != EPSILON: cost = 1 # Deletion cost is always 1. new_dist = d + cost # If this path is shorter to reach (v_A, u_B), update distance and push to PQ. if new_dist < dist.get((v_A, u_B), float('inf')): dist[(v_A, u_B)] = new_dist heapq.heappush(pq, (new_dist, v_A, u_B)) # Insert operation: Consume a character from B only (cost 1). # Iterate over character transitions from u_B in NFA B. for label_B, v_B in adj_B.get(u_B, []): if label_B != EPSILON: cost = 1 # Insertion cost is always 1. new_dist = d + cost # If this path is shorter to reach (u_A, v_B), update distance and push to PQ. if new_dist < dist.get((u_A, v_B), float('inf')): dist[(u_A, v_B)] = new_dist heapq.heappush(pq, (new_dist, u_A, v_B)) # The final answer is the minimum distance found to reach the final state pair (final_A, final_B). # If the final state pair is unreachable, dist.get() will return float('inf'). # The problem statement implies that a finite distance solution always exists. final_dist = dist.get((final_A, final_B), float('inf')) # Print the minimum distance as an integer. print(int(final_dist))