Fix LALR. Small, but very very slow.

This commit is contained in:
John Doty 2024-05-27 22:31:33 -07:00
parent 0fc04cf11e
commit 8d58c64040
2 changed files with 45 additions and 19 deletions

View file

@ -2,10 +2,22 @@ import bisect
import typing
import grammar
from parser import Token, Grammar, rule, seq
import parser
# from parser import Token, Grammar, rule, seq
def parse(table, tokens, trace=False):
def trace_state(stack, input, input_index, action):
print(
"{stack: <20} {input: <50} {action: <5}".format(
stack=repr([s[0] for s in stack]),
input=repr(input[input_index : input_index + 4]),
action=repr(action),
)
)
def parse(table, tokens, trace=None):
"""Parse the input with the generated parsing table and return the
concrete syntax tree.
@ -35,13 +47,7 @@ def parse(table, tokens, trace=False):
action = table[current_state].get(current_token, ("error",))
if trace:
print(
"{stack: <20} {input: <50} {action: <5}".format(
stack=repr([s[0] for s in stack]),
input=repr(input[input_index : input_index + 4]),
action=repr(action),
)
)
trace(stack, input, input_index, action)
if action[0] == "accept":
return (stack[-1][1], [])
@ -83,7 +89,9 @@ def parse(table, tokens, trace=False):
def harness(lexer_func, grammar_func, start_rule, source_path):
table = grammar_func().build_table(start=start_rule)
# generator = parser.GenerateLR1
generator = parser.GenerateLALR
table = grammar_func().build_table(start=start_rule, generator=generator)
print(f"{len(table)} states")
average_entries = sum(len(row) for row in table) / len(table)
@ -96,7 +104,7 @@ def harness(lexer_func, grammar_func, start_rule, source_path):
tokens = lexer_func(src)
# print(f"{tokens.lines}")
# tokens.dump(end=5)
(_, errors) = parse(table, tokens, trace=True)
(_, errors) = parse(table, tokens)
if len(errors) > 0:
print(f"{len(errors)} errors:")
for error in errors:

View file

@ -257,6 +257,14 @@ class Configuration:
lookahead=(),
)
def replace_lookahead(self, lookahead: typing.Tuple[int, ...]):
return Configuration(
name=self.name,
symbols=self.symbols,
position=self.position,
lookahead=lookahead,
)
@property
def rest(self):
return self.symbols[(self.position + 1) :]
@ -1382,7 +1390,11 @@ class GenerateLALR(GenerateLR1):
use a bunch of improvement, probably.)
"""
def merge_sets(self, config_set_a, config_set_b):
def merge_sets(
self,
config_set_a: typing.Tuple[Configuration, ...],
config_set_b: typing.Tuple[Configuration, ...],
):
"""Merge the two config sets, by keeping the item cores but merging
the lookahead sets for each item.
"""
@ -1394,7 +1406,7 @@ class GenerateLALR(GenerateLR1):
new_lookahead = a.lookahead + b.lookahead
new_lookahead = tuple(sorted(set(new_lookahead)))
merged.append(a.clear_lookahead())
merged.append(a.replace_lookahead(new_lookahead))
return tuple(merged)
@ -1403,7 +1415,7 @@ class GenerateLALR(GenerateLR1):
b_no_la = tuple(s.clear_lookahead() for s in b)
return a_no_la == b_no_la
def gen_sets(self, config_set) -> ConfigurationSetInfo:
def gen_sets(self, config_set: typing.Tuple[Configuration, ...]) -> ConfigurationSetInfo:
"""Recursively generate all configuration sets starting from the
provided set, and merge them with the provided set 'F'.
@ -1414,10 +1426,15 @@ class GenerateLALR(GenerateLR1):
and replace the set in F, returning the modified set.
"""
F = {}
seen = set()
successors = []
pending = [config_set]
while len(pending) > 0:
config_set = pending.pop()
if config_set in seen:
continue
seen.add(config_set)
config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
existing = F.get(config_set_no_la)
@ -1425,10 +1442,11 @@ class GenerateLALR(GenerateLR1):
F[config_set_no_la] = self.merge_sets(config_set, existing)
else:
F[config_set_no_la] = config_set
for symbol, successor in self.gen_all_successors(config_set):
successor_no_la = tuple(s.clear_lookahead() for s in successor)
successors.append((config_set_no_la, symbol, successor_no_la))
pending.append(successor)
for symbol, successor in self.gen_all_successors(config_set):
successor_no_la = tuple(s.clear_lookahead() for s in successor)
successors.append((config_set_no_la, symbol, successor_no_la))
pending.append(successor)
# Register all the actually merged, final config sets.
result = ConfigurationSetInfo()
@ -1723,7 +1741,7 @@ class Grammar:
return grammar
def build_table(self, start: str, generator=GenerateLR1):
def build_table(self, start: str, generator=GenerateLALR):
"""Construct a parse table for this grammar, starting at the named
nonterminal rule.
"""