Accept is single-valued, the multi-value thing didn't ever make sense

I mean, it did when we thought we were going to weave NFA states as we
were building them but we ended up not doing that and instead just
using the fancy EdgeList splitting magic when building DFAs from the
NFA. It has the same power and is simpler code, and also means that
we'll *never* be asked to have multiple Terminals be accepted from a
single NFA state.
This commit is contained in:
John Doty 2024-08-27 15:40:37 -07:00
parent 208491d56e
commit 76ef85483e

View file

@ -2149,15 +2149,15 @@ class EdgeList[ET]:
class NFAState: class NFAState:
"""An NFA state. Each state can be the accept state, with one or more """An NFA state. A state can be an accept state if it has a Terminal
Terminals as the result.""" associated with it."""
accept: list[Terminal] accept: Terminal | None
epsilons: list["NFAState"] epsilons: list["NFAState"]
_edges: EdgeList["NFAState"] _edges: EdgeList["NFAState"]
def __init__(self): def __init__(self):
self.accept = [] self.accept = None
self.epsilons = [] self.epsilons = []
self._edges = EdgeList() self._edges = EdgeList()
@ -2183,7 +2183,7 @@ class NFAState:
continue continue
visited.add(state) visited.add(state)
label = ", ".join([t.value for t in state.accept if t.value is not None]) label = state.accept.value if state.accept is not None else ""
f.write(f' {id(state)} [label="{label}"];\n') f.write(f' {id(state)} [label="{label}"];\n')
for target in state.epsilons: for target in state.epsilons:
stack.append(target) stack.append(target)
@ -2460,42 +2460,42 @@ class NFASuperState:
def accept_terminal(self) -> Terminal | None: def accept_terminal(self) -> Terminal | None:
accept = None accept = None
for st in self.states:
for ac in st.accept:
if accept is None:
accept = ac
elif accept.value != ac.value:
accept_regex = isinstance(accept.pattern, Re)
ac_regex = isinstance(ac.pattern, Re)
if accept_regex and not ac_regex: for st in self.states:
accept = ac if st.accept is None:
elif ac_regex and not accept_regex: continue
pass
else: if accept is None:
raise ValueError( accept = st.accept
f"Lexer is ambiguous: cannot distinguish between {accept.value} ('{accept.pattern}') and {ac.value} ('{ac.pattern}')" elif accept.value != st.accept.value:
) if accept.regex and not st.accept.regex:
accept = st.accept
elif st.accept.regex and not accept.regex:
pass
else:
raise ValueError(
f"Lexer is ambiguous: cannot distinguish between {accept.value} ('{accept.pattern}') and {st.accept.value} ('{st.accept.pattern}')"
)
return accept return accept
def compile_terminals(terminals: typing.Iterable[Terminal]) -> LexerTable: def compile_lexer(grammar: Grammar) -> LexerTable:
# Parse the terminals all together into a big NFA rooted at `NFA`. # Parse the terminals all together into a big NFA rooted at `NFA`.
NFA = NFAState() NFA = NFAState()
for terminal in terminals: for terminal in grammar.terminals:
pattern = terminal.pattern pattern = terminal.pattern
if isinstance(pattern, Re): if isinstance(pattern, Re):
start, ends = pattern.to_nfa() start, ends = pattern.to_nfa()
for end in ends: for end in ends:
end.accept.append(terminal) end.accept = terminal
NFA.epsilons.append(start) NFA.epsilons.append(start)
else: else:
start = end = NFAState() start = end = NFAState()
for c in pattern: for c in pattern:
end = end.add_edge(Span.from_str(c), NFAState()) end = end.add_edge(Span.from_str(c), NFAState())
end.accept.append(terminal) end.accept = terminal
NFA.epsilons.append(start) NFA.epsilons.append(start)
NFA.dump_graph() NFA.dump_graph()
@ -2525,12 +2525,8 @@ def compile_terminals(terminals: typing.Iterable[Terminal]) -> LexerTable:
] ]
def compile_lexer(grammar: Grammar) -> LexerTable: def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
return compile_terminals(grammar.terminals) with open(name, "w", encoding="utf-8") as f:
def dump_lexer_table(table: LexerTable):
with open("lexer.dot", "w", encoding="utf-8") as f:
f.write("digraph G {\n") f.write("digraph G {\n")
for index, (accept, edges) in enumerate(table): for index, (accept, edges) in enumerate(table):
label = accept.value if accept is not None else "" label = accept.value if accept is not None else ""