結果
| 問題 |
No.874 正規表現間距離
|
| コンテスト | |
| ユーザー |
qwewe
|
| 提出日時 | 2025-05-14 13:10:53 |
| 言語 | PyPy3 (7.3.15) |
| 結果 |
TLE
|
| 実行時間 | - |
| コード長 | 9,258 bytes |
| コンパイル時間 | 158 ms |
| コンパイル使用メモリ | 82,356 KB |
| 実行使用メモリ | 163,396 KB |
| 最終ジャッジ日時 | 2025-05-14 13:12:17 |
| 合計ジャッジ時間 | 6,286 ms |
|
ジャッジサーバーID (参考情報) |
judge2 / judge3 |
(要ログイン)
| ファイルパターン | 結果 |
|---|---|
| sample | -- * 4 |
| other | AC * 17 TLE * 1 -- * 20 |
ソースコード
import heapq
import sys
# Use sys.stdin.readline for faster input reading
input = sys.stdin.readline
# Define a constant for Epsilon transitions in the NFA.
# Using -1 as it doesn't conflict with ASCII ordinals of lowercase letters.
EPSILON = -1
# Function to parse a regular expression string and build a Non-deterministic Finite Automaton (NFA).
# The NFA uses Thompson's construction principles.
def build_nfa(regex):
"""
Builds an NFA from a given regular expression string.
Args:
regex (str): The regular expression string.
Returns:
tuple: (num_states, start_node, final_node, adj)
num_states: Total number of states in the NFA.
start_node: The ID of the start state.
final_node: The ID of the final state.
adj: Adjacency list representation of the NFA.
Dictionary mapping state_id to list of tuples (label, to_state_id).
'label' is either EPSILON or the ASCII ordinal of a character.
"""
adj = {} # Adjacency list: state_id -> list of (label, to_state_id)
num_states = 1 # Start with state 0
start_node = 0
curr_node = 0 # Track the current state during construction
adj[curr_node] = [] # Initialize adjacency list for the start node
i = 0
while i < len(regex):
char = regex[i]
# Check if the character is followed by a quantifier ('?' or '*')
if i + 1 < len(regex) and regex[i+1] in ['?', '*']:
quantifier = regex[i+1]
if quantifier == '?':
# Optional character c?: Matches c zero or one time.
# Need a new state after this component.
new_node = num_states
num_states += 1
adj[new_node] = [] # Initialize adj list for the new state
# Add transitions from the current node to the new node:
if curr_node not in adj: adj[curr_node] = []
# Path for zero occurrence (skip c): Epsilon transition
adj[curr_node].append((EPSILON, new_node))
# Path for one occurrence (match c): Transition on character c
adj[curr_node].append((ord(char), new_node))
curr_node = new_node # Update current node
i += 2 # Move index past 'c' and '?'
else: # quantifier == '*'
# Star character c*: Matches c zero or more times.
# Need two new states: one intermediate for the loop, one after the structure.
inter_node = num_states # Intermediate state for the c* loop
num_states += 1
adj[inter_node] = []
new_node = num_states # State after the entire c* structure
num_states += 1
adj[new_node] = []
# Connect current node to the star structure and the node after it
if curr_node not in adj: adj[curr_node] = []
# Path to enter the loop structure
adj[curr_node].append((EPSILON, inter_node))
# Path to skip the star structure entirely (0 occurrences)
adj[curr_node].append((EPSILON, new_node))
# Set up the loop within the star structure
adj[inter_node].append((ord(char), inter_node)) # Loop back on matching c
adj[inter_node].append((EPSILON, new_node)) # Exit the loop
curr_node = new_node # Update current node to the state after c*
i += 2 # Move index past 'c' and '*'
else:
# Literal character (no quantifier following)
# Simple transition on the character to a new state.
new_node = num_states
num_states += 1
adj[new_node] = []
if curr_node not in adj: adj[curr_node] = []
adj[curr_node].append((ord(char), new_node)) # Transition on the character
curr_node = new_node # Update current node
i += 1 # Move index past the character
final_node = curr_node # The last node reached during construction is the final state of the NFA.
# Ensure all state IDs up to num_states-1 have an entry in the adj dictionary.
# This prevents KeyError if a state has no outgoing transitions.
for s_id in range(num_states):
if s_id not in adj:
adj[s_id] = []
return num_states, start_node, final_node, adj
# Read the two regular expressions A and B from standard input
A = input().strip()
B = input().strip()
# Build the NFAs for A and B
num_states_A, start_A, final_A, adj_A = build_nfa(A)
num_states_B, start_B, final_B, adj_B = build_nfa(B)
# Compute the minimum edit distance using Dijkstra's algorithm on the product automaton.
# The state space of the product automaton consists of pairs (state_A, state_B).
# dist dictionary stores the minimum edit distance found so far to reach each state pair.
dist = {}
# Priority queue stores tuples (current_distance, state_A, state_B), ordered by distance.
pq = [(0, start_A, start_B)]
dist[(start_A, start_B)] = 0 # The distance to the initial state pair (start_A, start_B) is 0.
while pq:
# Extract the state pair with the smallest distance from the priority queue.
d, u_A, u_B = heapq.heappop(pq)
# If we have already found a shorter path to this state pair, skip processing.
current_dist = dist.get((u_A, u_B), float('inf'))
if d > current_dist:
continue
# Explore possible transitions (corresponding to edit operations) from the current state pair (u_A, u_B).
# 1. Epsilon transitions (cost 0): These correspond to internal NFA transitions that don't consume input characters.
# Check for Epsilon transitions originating from state u_A in NFA A.
for label_A, v_A in adj_A.get(u_A, []):
if label_A == EPSILON:
new_dist = d # Epsilon transitions have zero cost.
# If this path is shorter than any known path to (v_A, u_B), update distance and push to PQ.
if new_dist < dist.get((v_A, u_B), float('inf')):
dist[(v_A, u_B)] = new_dist
heapq.heappush(pq, (new_dist, v_A, u_B))
# Check for Epsilon transitions originating from state u_B in NFA B.
for label_B, v_B in adj_B.get(u_B, []):
if label_B == EPSILON:
new_dist = d # Epsilon transitions have zero cost.
# If this path is shorter than any known path to (u_A, v_B), update distance and push to PQ.
if new_dist < dist.get((u_A, v_B), float('inf')):
dist[(u_A, v_B)] = new_dist
heapq.heappush(pq, (new_dist, u_A, v_B))
# 2. Character transitions (representing edit operations with costs 0 or 1)
# Match / Replace operation: Consume characters from both A and B simultaneously.
# Iterate over character transitions from u_A in NFA A.
for label_A, v_A in adj_A.get(u_A, []):
if label_A != EPSILON:
# Iterate over character transitions from u_B in NFA B.
for label_B, v_B in adj_B.get(u_B, []):
if label_B != EPSILON:
# Calculate cost: 0 if characters match, 1 if they differ (replacement needed).
cost = 0 if label_A == label_B else 1
new_dist = d + cost
# If this path is shorter to reach (v_A, v_B), update distance and push to PQ.
if new_dist < dist.get((v_A, v_B), float('inf')):
dist[(v_A, v_B)] = new_dist
heapq.heappush(pq, (new_dist, v_A, v_B))
# Delete operation: Consume a character from A only (cost 1).
# Iterate over character transitions from u_A in NFA A.
for label_A, v_A in adj_A.get(u_A, []):
if label_A != EPSILON:
cost = 1 # Deletion cost is always 1.
new_dist = d + cost
# If this path is shorter to reach (v_A, u_B), update distance and push to PQ.
if new_dist < dist.get((v_A, u_B), float('inf')):
dist[(v_A, u_B)] = new_dist
heapq.heappush(pq, (new_dist, v_A, u_B))
# Insert operation: Consume a character from B only (cost 1).
# Iterate over character transitions from u_B in NFA B.
for label_B, v_B in adj_B.get(u_B, []):
if label_B != EPSILON:
cost = 1 # Insertion cost is always 1.
new_dist = d + cost
# If this path is shorter to reach (u_A, v_B), update distance and push to PQ.
if new_dist < dist.get((u_A, v_B), float('inf')):
dist[(u_A, v_B)] = new_dist
heapq.heappush(pq, (new_dist, u_A, v_B))
# The final answer is the minimum distance found to reach the final state pair (final_A, final_B).
# If the final state pair is unreachable, dist.get() will return float('inf').
# The problem statement implies that a finite distance solution always exists.
final_dist = dist.get((final_A, final_B), float('inf'))
# Print the minimum distance as an integer.
print(int(final_dist))
qwewe