Accept is single-valued, the multi-value thing didn't ever make sense
I mean, it did when we thought we were going to weave NFA states as we were building them but we ended up not doing that and instead just using the fancy EdgeList splitting magic when building DFAs from the NFA. It has the same power and is simpler code, and also means that we'll *never* be asked to have multiple Terminals be accepted from a single NFA state.
This commit is contained in:
parent
208491d56e
commit
76ef85483e
1 changed files with 26 additions and 30 deletions
|
|
@ -2149,15 +2149,15 @@ class EdgeList[ET]:
|
||||||
|
|
||||||
|
|
||||||
class NFAState:
|
class NFAState:
|
||||||
"""An NFA state. Each state can be the accept state, with one or more
|
"""An NFA state. A state can be an accept state if it has a Terminal
|
||||||
Terminals as the result."""
|
associated with it."""
|
||||||
|
|
||||||
accept: list[Terminal]
|
accept: Terminal | None
|
||||||
epsilons: list["NFAState"]
|
epsilons: list["NFAState"]
|
||||||
_edges: EdgeList["NFAState"]
|
_edges: EdgeList["NFAState"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.accept = []
|
self.accept = None
|
||||||
self.epsilons = []
|
self.epsilons = []
|
||||||
self._edges = EdgeList()
|
self._edges = EdgeList()
|
||||||
|
|
||||||
|
|
@ -2183,7 +2183,7 @@ class NFAState:
|
||||||
continue
|
continue
|
||||||
visited.add(state)
|
visited.add(state)
|
||||||
|
|
||||||
label = ", ".join([t.value for t in state.accept if t.value is not None])
|
label = state.accept.value if state.accept is not None else ""
|
||||||
f.write(f' {id(state)} [label="{label}"];\n')
|
f.write(f' {id(state)} [label="{label}"];\n')
|
||||||
for target in state.epsilons:
|
for target in state.epsilons:
|
||||||
stack.append(target)
|
stack.append(target)
|
||||||
|
|
@ -2460,42 +2460,42 @@ class NFASuperState:
|
||||||
|
|
||||||
def accept_terminal(self) -> Terminal | None:
|
def accept_terminal(self) -> Terminal | None:
|
||||||
accept = None
|
accept = None
|
||||||
for st in self.states:
|
|
||||||
for ac in st.accept:
|
|
||||||
if accept is None:
|
|
||||||
accept = ac
|
|
||||||
elif accept.value != ac.value:
|
|
||||||
accept_regex = isinstance(accept.pattern, Re)
|
|
||||||
ac_regex = isinstance(ac.pattern, Re)
|
|
||||||
|
|
||||||
if accept_regex and not ac_regex:
|
for st in self.states:
|
||||||
accept = ac
|
if st.accept is None:
|
||||||
elif ac_regex and not accept_regex:
|
continue
|
||||||
pass
|
|
||||||
else:
|
if accept is None:
|
||||||
raise ValueError(
|
accept = st.accept
|
||||||
f"Lexer is ambiguous: cannot distinguish between {accept.value} ('{accept.pattern}') and {ac.value} ('{ac.pattern}')"
|
elif accept.value != st.accept.value:
|
||||||
)
|
if accept.regex and not st.accept.regex:
|
||||||
|
accept = st.accept
|
||||||
|
elif st.accept.regex and not accept.regex:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Lexer is ambiguous: cannot distinguish between {accept.value} ('{accept.pattern}') and {st.accept.value} ('{st.accept.pattern}')"
|
||||||
|
)
|
||||||
|
|
||||||
return accept
|
return accept
|
||||||
|
|
||||||
|
|
||||||
def compile_terminals(terminals: typing.Iterable[Terminal]) -> LexerTable:
|
def compile_lexer(grammar: Grammar) -> LexerTable:
|
||||||
# Parse the terminals all together into a big NFA rooted at `NFA`.
|
# Parse the terminals all together into a big NFA rooted at `NFA`.
|
||||||
NFA = NFAState()
|
NFA = NFAState()
|
||||||
for terminal in terminals:
|
for terminal in grammar.terminals:
|
||||||
pattern = terminal.pattern
|
pattern = terminal.pattern
|
||||||
if isinstance(pattern, Re):
|
if isinstance(pattern, Re):
|
||||||
start, ends = pattern.to_nfa()
|
start, ends = pattern.to_nfa()
|
||||||
for end in ends:
|
for end in ends:
|
||||||
end.accept.append(terminal)
|
end.accept = terminal
|
||||||
NFA.epsilons.append(start)
|
NFA.epsilons.append(start)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
start = end = NFAState()
|
start = end = NFAState()
|
||||||
for c in pattern:
|
for c in pattern:
|
||||||
end = end.add_edge(Span.from_str(c), NFAState())
|
end = end.add_edge(Span.from_str(c), NFAState())
|
||||||
end.accept.append(terminal)
|
end.accept = terminal
|
||||||
NFA.epsilons.append(start)
|
NFA.epsilons.append(start)
|
||||||
|
|
||||||
NFA.dump_graph()
|
NFA.dump_graph()
|
||||||
|
|
@ -2525,12 +2525,8 @@ def compile_terminals(terminals: typing.Iterable[Terminal]) -> LexerTable:
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def compile_lexer(grammar: Grammar) -> LexerTable:
|
def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
|
||||||
return compile_terminals(grammar.terminals)
|
with open(name, "w", encoding="utf-8") as f:
|
||||||
|
|
||||||
|
|
||||||
def dump_lexer_table(table: LexerTable):
|
|
||||||
with open("lexer.dot", "w", encoding="utf-8") as f:
|
|
||||||
f.write("digraph G {\n")
|
f.write("digraph G {\n")
|
||||||
for index, (accept, edges) in enumerate(table):
|
for index, (accept, edges) in enumerate(table):
|
||||||
label = accept.value if accept is not None else ""
|
label = accept.value if accept is not None else ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue