[parser] Error recovery tests

Based on the blog post "Resilient LL Parsing Tutorial" by Alex Kladov, at https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html Because I was trying to be "simple" in my grammar definition I found a bug in the grammar class, whoops! :)
2024-09-22 08:46:54 -07:00 · 2024-09-22 08:46:54 -07:00 · bb52ab8da5
commit bb52ab8da5
parent 071cd29d8f
2 changed files with 24 additions and 10 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -706,7 +706,12 @@ class TableBuilder(object):
        assert self.goto_row[symbol] is None  # ?
        self.goto_row[symbol] = index

-    def _action_precedence(self, symbol: int, action: Action, config: Configuration):
+    def _action_precedence(
+        self,
+        symbol: int,
+        action: Action,
+        config: Configuration,
+    ) -> tuple[Assoc, int]:
        if isinstance(action, Shift):
            return self.precedence[symbol]
        else:
@ -2761,7 +2766,7 @@ class TriviaMode(enum.Enum):
 # Finally, the base class for grammars
 ###############################################################################

-PrecedenceList = list[typing.Tuple[Assoc, list[Rule]]]
+PrecedenceList = list[typing.Tuple[Assoc, list[Rule | str]]]


 class Grammar:
@ -2799,7 +2804,7 @@ class Grammar:

    def __init__(
        self,
-        start: str | None = None,
+        start: str | NonTerminal | None = None,
        precedence: PrecedenceList | None = None,
        generator: type[GenerateLR0] | None = None,
        trivia: list[str | Terminal] | None = None,
@ -2812,6 +2817,8 @@ class Grammar:
                "The default start rule must either be specified in the constructor or as an "
                "attribute in the class."
            )
+        if isinstance(start, NonTerminal):
+            start = start.name

        if precedence is None:
            precedence = getattr(self, "precedence", [])
@ -2856,8 +2863,10 @@ class Grammar:
                if resolved is None:
                    raise ValueError(f"The trivia '{t}' is not a terminal name")
                resolved_trivia.append(resolved)
-            else:
+            elif isinstance(t, Terminal):
                resolved_trivia.append(t)
+            else:
+                raise ValueError(f"{t} must be either a terminal name or literally a terminal")

        # Fix up the precedence table.
        precedence_table = {}
@ -2871,9 +2880,8 @@ class Grammar:
                elif isinstance(symbol, NonTerminal):
                    key = symbol.name
                elif isinstance(symbol, str):
-                    key = terminals.get(symbol)
-                    if key is None:
-                        key = nonterminals.get(symbol)
+                    if symbol in terminals or symbol in nonterminals:
+                        key = symbol

                if key is None:
                    raise ValueError(
--- a/parser/runtime.py
+++ b/parser/runtime.py
@ -24,17 +24,23 @@ class Tree:
    end: int
    children: typing.Tuple["Tree | TokenValue", ...]

-    def format_lines(self, source: str | None = None) -> list[str]:
+    def format_lines(self, source: str | None = None, *, ignore_error: bool = False) -> list[str]:
        lines = []

        def format_node(node: Tree | TokenValue, indent: int):
            match node:
                case Tree(name=name, start=start, end=end, children=children):
+                    if ignore_error and start == end:
+                        return
+
                    lines.append((" " * indent) + f"{name or '???'} [{start}, {end})")
                    for child in children:
                        format_node(child, indent + 2)

                case TokenValue(kind=kind, start=start, end=end):
+                    if ignore_error and start == end:
+                        return
+
                    if source is not None:
                        value = f":'{source[start:end]}'"
                    else:
@ -44,8 +50,8 @@ class Tree:
        format_node(self, 0)
        return lines

-    def format(self, source: str | None = None) -> str:
-        return "\n".join(self.format_lines(source))
+    def format(self, source: str | None = None, *, ignore_error: bool = False) -> str:
+        return "\n".join(self.format_lines(source, ignore_error=ignore_error))


@dataclass