Fix LALR. Small, but very very slow.

2024-05-27 22:31:33 -07:00 · 2024-05-27 22:31:33 -07:00 · 8d58c64040
commit 8d58c64040
parent 0fc04cf11e
2 changed files with 45 additions and 19 deletions
--- a/harness.py
+++ b/harness.py
@ -2,10 +2,22 @@ import bisect
 import typing
 import grammar
-from parser import Token, Grammar, rule, seq
+import parser
 # from parser import Token, Grammar, rule, seq
-def parse(table, tokens, trace=False):
+def trace_state(stack, input, input_index, action):
    print(
        "{stack: <20}  {input: <50}  {action: <5}".format(
            stack=repr([s[0] for s in stack]),
            input=repr(input[input_index : input_index + 4]),
            action=repr(action),
        )
    )
 def parse(table, tokens, trace=None):
    """Parse the input with the generated parsing table and return the
    concrete syntax tree.
@ -35,13 +47,7 @@ def parse(table, tokens, trace=False):
        action = table[current_state].get(current_token, ("error",))
        if trace:
-            print(
+            trace(stack, input, input_index, action)
                "{stack: <20}  {input: <50}  {action: <5}".format(
                    stack=repr([s[0] for s in stack]),
                    input=repr(input[input_index : input_index + 4]),
                    action=repr(action),
                )
            )
        if action[0] == "accept":
            return (stack[-1][1], [])
@ -83,7 +89,9 @@ def parse(table, tokens, trace=False):
 def harness(lexer_func, grammar_func, start_rule, source_path):
-    table = grammar_func().build_table(start=start_rule)
+    # generator = parser.GenerateLR1
    generator = parser.GenerateLALR
    table = grammar_func().build_table(start=start_rule, generator=generator)
    print(f"{len(table)} states")
    average_entries = sum(len(row) for row in table) / len(table)
@ -96,7 +104,7 @@ def harness(lexer_func, grammar_func, start_rule, source_path):
        tokens = lexer_func(src)
        # print(f"{tokens.lines}")
        # tokens.dump(end=5)
-        (_, errors) = parse(table, tokens, trace=True)
+        (_, errors) = parse(table, tokens)
        if len(errors) > 0:
            print(f"{len(errors)} errors:")
            for error in errors:
--- a/parser.py
+++ b/parser.py
@ -257,6 +257,14 @@ class Configuration:
            lookahead=(),
        )
    def replace_lookahead(self, lookahead: typing.Tuple[int, ...]):
        return Configuration(
            name=self.name,
            symbols=self.symbols,
            position=self.position,
            lookahead=lookahead,
        )
    @property
    def rest(self):
        return self.symbols[(self.position + 1) :]
@ -1382,7 +1390,11 @@ class GenerateLALR(GenerateLR1):
    use a bunch of improvement, probably.)
    """
-    def merge_sets(self, config_set_a, config_set_b):
+    def merge_sets(
        self,
        config_set_a: typing.Tuple[Configuration, ...],
        config_set_b: typing.Tuple[Configuration, ...],
    ):
        """Merge the two config sets, by keeping the item cores but merging
        the lookahead sets for each item.
        """
@ -1394,7 +1406,7 @@ class GenerateLALR(GenerateLR1):
            new_lookahead = a.lookahead + b.lookahead
            new_lookahead = tuple(sorted(set(new_lookahead)))
-            merged.append(a.clear_lookahead())
+            merged.append(a.replace_lookahead(new_lookahead))
        return tuple(merged)
@ -1403,7 +1415,7 @@ class GenerateLALR(GenerateLR1):
        b_no_la = tuple(s.clear_lookahead() for s in b)
        return a_no_la == b_no_la
-    def gen_sets(self, config_set) -> ConfigurationSetInfo:
+    def gen_sets(self, config_set: typing.Tuple[Configuration, ...]) -> ConfigurationSetInfo:
        """Recursively generate all configuration sets starting from the
        provided set, and merge them with the provided set 'F'.
@ -1414,10 +1426,15 @@ class GenerateLALR(GenerateLR1):
        and replace the set in F, returning the modified set.
        """
        F = {}
        seen = set()
        successors = []
        pending = [config_set]
        while len(pending) > 0:
            config_set = pending.pop()
            if config_set in seen:
                continue
            seen.add(config_set)
            config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
            existing = F.get(config_set_no_la)
@ -1425,6 +1442,7 @@ class GenerateLALR(GenerateLR1):
                F[config_set_no_la] = self.merge_sets(config_set, existing)
            else:
                F[config_set_no_la] = config_set
            for symbol, successor in self.gen_all_successors(config_set):
                successor_no_la = tuple(s.clear_lookahead() for s in successor)
                successors.append((config_set_no_la, symbol, successor_no_la))
@ -1723,7 +1741,7 @@ class Grammar:
        return grammar
-    def build_table(self, start: str, generator=GenerateLR1):
+    def build_table(self, start: str, generator=GenerateLALR):
        """Construct a parse table for this grammar, starting at the named
        nonterminal rule.
        """