結果

問題 No.874 正規表現間距離
ユーザー qwewe
提出日時 2025-05-14 13:10:53
言語 PyPy3
(7.3.15)
結果
TLE  
実行時間 -
コード長 9,258 bytes
コンパイル時間 158 ms
コンパイル使用メモリ 82,356 KB
実行使用メモリ 163,396 KB
最終ジャッジ日時 2025-05-14 13:12:17
合計ジャッジ時間 6,286 ms
ジャッジサーバーID
(参考情報)
judge2 / judge3
このコードへのチャレンジ
(要ログイン)
ファイルパターン 結果
sample -- * 4
other AC * 17 TLE * 1 -- * 20
権限があれば一括ダウンロードができます

ソースコード

diff #

import heapq
import sys

# Use sys.stdin.readline for faster input reading
input = sys.stdin.readline

# Define a constant for Epsilon transitions in the NFA.
# Using -1 as it doesn't conflict with ASCII ordinals of lowercase letters.
EPSILON = -1 

# Function to parse a regular expression string and build a Non-deterministic Finite Automaton (NFA).
# The NFA uses Thompson's construction principles.
def build_nfa(regex):
    """
    Builds an NFA from a given regular expression string.
    Args:
        regex (str): The regular expression string.
    Returns:
        tuple: (num_states, start_node, final_node, adj)
               num_states: Total number of states in the NFA.
               start_node: The ID of the start state.
               final_node: The ID of the final state.
               adj: Adjacency list representation of the NFA. 
                    Dictionary mapping state_id to list of tuples (label, to_state_id).
                    'label' is either EPSILON or the ASCII ordinal of a character.
    """
    adj = {} # Adjacency list: state_id -> list of (label, to_state_id)
    num_states = 1 # Start with state 0
    start_node = 0
    curr_node = 0 # Track the current state during construction
    
    adj[curr_node] = [] # Initialize adjacency list for the start node

    i = 0
    while i < len(regex):
        char = regex[i]
        
        # Check if the character is followed by a quantifier ('?' or '*')
        if i + 1 < len(regex) and regex[i+1] in ['?', '*']:
            quantifier = regex[i+1]
            
            if quantifier == '?':
                # Optional character c?: Matches c zero or one time.
                # Need a new state after this component.
                new_node = num_states
                num_states += 1
                adj[new_node] = [] # Initialize adj list for the new state

                # Add transitions from the current node to the new node:
                if curr_node not in adj: adj[curr_node] = []
                 # Path for zero occurrence (skip c): Epsilon transition
                adj[curr_node].append((EPSILON, new_node)) 
                 # Path for one occurrence (match c): Transition on character c
                adj[curr_node].append((ord(char), new_node)) 
                
                curr_node = new_node # Update current node
                i += 2 # Move index past 'c' and '?'
            
            else: # quantifier == '*'
                # Star character c*: Matches c zero or more times.
                # Need two new states: one intermediate for the loop, one after the structure.
                inter_node = num_states # Intermediate state for the c* loop
                num_states += 1
                adj[inter_node] = []

                new_node = num_states # State after the entire c* structure
                num_states += 1
                adj[new_node] = []

                # Connect current node to the star structure and the node after it
                if curr_node not in adj: adj[curr_node] = []
                 # Path to enter the loop structure
                adj[curr_node].append((EPSILON, inter_node))
                 # Path to skip the star structure entirely (0 occurrences)
                adj[curr_node].append((EPSILON, new_node)) 

                # Set up the loop within the star structure
                adj[inter_node].append((ord(char), inter_node)) # Loop back on matching c
                adj[inter_node].append((EPSILON, new_node)) # Exit the loop
                
                curr_node = new_node # Update current node to the state after c*
                i += 2 # Move index past 'c' and '*'
            
        else:
            # Literal character (no quantifier following)
            # Simple transition on the character to a new state.
            new_node = num_states
            num_states += 1
            adj[new_node] = []
            
            if curr_node not in adj: adj[curr_node] = []
            adj[curr_node].append((ord(char), new_node)) # Transition on the character
            
            curr_node = new_node # Update current node
            i += 1 # Move index past the character
            
    final_node = curr_node # The last node reached during construction is the final state of the NFA.
    
    # Ensure all state IDs up to num_states-1 have an entry in the adj dictionary.
    # This prevents KeyError if a state has no outgoing transitions.
    for s_id in range(num_states):
        if s_id not in adj:
             adj[s_id] = []

    return num_states, start_node, final_node, adj

# Read the two regular expressions A and B from standard input
A = input().strip()
B = input().strip()

# Build the NFAs for A and B
num_states_A, start_A, final_A, adj_A = build_nfa(A)
num_states_B, start_B, final_B, adj_B = build_nfa(B)

# Compute the minimum edit distance using Dijkstra's algorithm on the product automaton.
# The state space of the product automaton consists of pairs (state_A, state_B).
# dist dictionary stores the minimum edit distance found so far to reach each state pair.
dist = {}
# Priority queue stores tuples (current_distance, state_A, state_B), ordered by distance.
pq = [(0, start_A, start_B)] 
dist[(start_A, start_B)] = 0 # The distance to the initial state pair (start_A, start_B) is 0.

while pq:
    # Extract the state pair with the smallest distance from the priority queue.
    d, u_A, u_B = heapq.heappop(pq)

    # If we have already found a shorter path to this state pair, skip processing.
    current_dist = dist.get((u_A, u_B), float('inf'))
    if d > current_dist:
        continue

    # Explore possible transitions (corresponding to edit operations) from the current state pair (u_A, u_B).

    # 1. Epsilon transitions (cost 0): These correspond to internal NFA transitions that don't consume input characters.
    
    # Check for Epsilon transitions originating from state u_A in NFA A.
    for label_A, v_A in adj_A.get(u_A, []):
        if label_A == EPSILON:
            new_dist = d # Epsilon transitions have zero cost.
            # If this path is shorter than any known path to (v_A, u_B), update distance and push to PQ.
            if new_dist < dist.get((v_A, u_B), float('inf')):
                dist[(v_A, u_B)] = new_dist
                heapq.heappush(pq, (new_dist, v_A, u_B))

    # Check for Epsilon transitions originating from state u_B in NFA B.
    for label_B, v_B in adj_B.get(u_B, []):
        if label_B == EPSILON:
             new_dist = d # Epsilon transitions have zero cost.
             # If this path is shorter than any known path to (u_A, v_B), update distance and push to PQ.
             if new_dist < dist.get((u_A, v_B), float('inf')):
                dist[(u_A, v_B)] = new_dist
                heapq.heappush(pq, (new_dist, u_A, v_B))
    
    # 2. Character transitions (representing edit operations with costs 0 or 1)

    # Match / Replace operation: Consume characters from both A and B simultaneously.
    # Iterate over character transitions from u_A in NFA A.
    for label_A, v_A in adj_A.get(u_A, []):
        if label_A != EPSILON: 
            # Iterate over character transitions from u_B in NFA B.
            for label_B, v_B in adj_B.get(u_B, []):
                 if label_B != EPSILON:
                    # Calculate cost: 0 if characters match, 1 if they differ (replacement needed).
                    cost = 0 if label_A == label_B else 1
                    new_dist = d + cost
                    # If this path is shorter to reach (v_A, v_B), update distance and push to PQ.
                    if new_dist < dist.get((v_A, v_B), float('inf')):
                        dist[(v_A, v_B)] = new_dist
                        heapq.heappush(pq, (new_dist, v_A, v_B))

    # Delete operation: Consume a character from A only (cost 1).
    # Iterate over character transitions from u_A in NFA A.
    for label_A, v_A in adj_A.get(u_A, []):
        if label_A != EPSILON:
            cost = 1 # Deletion cost is always 1.
            new_dist = d + cost
            # If this path is shorter to reach (v_A, u_B), update distance and push to PQ.
            if new_dist < dist.get((v_A, u_B), float('inf')):
                dist[(v_A, u_B)] = new_dist
                heapq.heappush(pq, (new_dist, v_A, u_B))

    # Insert operation: Consume a character from B only (cost 1).
    # Iterate over character transitions from u_B in NFA B.
    for label_B, v_B in adj_B.get(u_B, []):
         if label_B != EPSILON:
            cost = 1 # Insertion cost is always 1.
            new_dist = d + cost
            # If this path is shorter to reach (u_A, v_B), update distance and push to PQ.
            if new_dist < dist.get((u_A, v_B), float('inf')):
                dist[(u_A, v_B)] = new_dist
                heapq.heappush(pq, (new_dist, u_A, v_B))


# The final answer is the minimum distance found to reach the final state pair (final_A, final_B).
# If the final state pair is unreachable, dist.get() will return float('inf').
# The problem statement implies that a finite distance solution always exists.
final_dist = dist.get((final_A, final_B), float('inf'))

# Print the minimum distance as an integer.
print(int(final_dist))
0