3 changed files with 174 additions and 330 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2815,12 +2815,10 @@ class Grammar:
    def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
        return self._precedence.get(name)

-    # TODO: The flattened form should retain NonTerminal, not just str.
    def generate_nonterminal_dict(
        self, start: str | None = None
    ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
-        """Convert the rules into a dictionary of productions, and a set of
-        the names of transparent nonterminals.
+        """Convert the rules into a dictionary of productions.

        Our table generators work on a very flat set of productions. This is the
        first step in flattening the productions from the members: walk the rules
@ -2840,8 +2838,6 @@ class Grammar:
        rule = nonterminals.get(start)
        if rule is None:
            raise ValueError(f"Cannot find a rule named '{start}'")
-        if rule.transparent:
-            raise ValueError("The start rule cannot be transparent")
        queue = [rule]
        while len(queue) > 0:
            rule = queue.pop()
--- a/parser/runtime.py
+++ b/parser/runtime.py
@ -22,29 +22,6 @@ class Tree:
    end: int
    children: typing.Tuple["Tree | TokenValue", ...]

-    def format_lines(self, source: str | None = None) -> list[str]:
-        lines = []
-
-        def format_node(node: Tree | TokenValue, indent: int):
-            match node:
-                case Tree(name=name, start=start, end=end, children=children):
-                    lines.append((" " * indent) + f"{name or '???'} [{start}, {end})")
-                    for child in children:
-                        format_node(child, indent + 2)
-
-                case TokenValue(kind=kind, start=start, end=end):
-                    if source is not None:
-                        value = f":'{source[start:end]}'"
-                    else:
-                        value = ""
-                    lines.append((" " * indent) + f"{kind}{value} [{start}, {end})")
-
-        format_node(self, 0)
-        return lines
-
-    def format(self, source: str | None = None) -> str:
-        return "\n".join(self.format_lines(source))
-

@dataclass
 class ParseError:
@ -301,15 +278,13 @@ class TokenStream(typing.Protocol):
        ...


-# TODO: This runtime API sucks; the TokenStream is nice and all but I should
-#       also be able to have a function that takes a string and produces a
-#       tree directly, with caching intermediates for codegen and whatnot.
-
-
 class Parser:
+    # Our stack is a stack of tuples, where the first entry is the state
+    # number and the second entry is the 'value' that was generated when the
+    # state was pushed.
    table: parser.ParseTable

-    def __init__(self, table: parser.ParseTable):
+    def __init__(self, table):
        self.table = table

    def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
@ -326,9 +301,6 @@ class Parser:
        input = input + [TokenValue(kind="$", start=eof, end=eof)]
        input_index = 0

-        # Our stack is a stack of tuples, where the first entry is the state
-        # number and the second entry is the 'value' that was generated when
-        # the state was pushed.
        stack: ParseStack = [(0, None)]
        result: Tree | None = None
        errors: list[ParseError] = []
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -1,7 +1,5 @@
 # A prettier printer.
-import abc
 import dataclasses
-import math
 import typing

 from . import parser
@ -14,13 +12,6 @@ class Cons:
    right: "Document"


-def cons(left: "Document", right: "Document") -> "Document":
-    if left and right:
-        return Cons(left, right)
-    else:
-        return left or right
-
-
@dataclasses.dataclass(frozen=True)
 class NewLine:
    pass
@ -56,230 +47,144 @@ class Lazy:
 Document = None | Text | NewLine | Cons | Indent | Group | Lazy


-def resolve_document(doc: Document) -> Document:
-    match doc:
-        case Cons(left, right):
-            lr = resolve_document(left)
-            rr = resolve_document(right)
-            if lr is not left or rr is not right:
-                return cons(lr, rr)
-            else:
-                return doc
-
-        case Lazy(_):
-            return doc.resolve()
-
-        case _:
-            return doc
-
-
 def layout_document(doc: Document) -> typing.Generator[str, None, None]:
-    del doc
    raise NotImplementedError()


-@dataclasses.dataclass(frozen=True)
-class MatchTerminal:
-    name: str
+@dataclasses.dataclass
+class Match:
+    doc: Document
+    remaining: list[runtime.Tree | runtime.TokenValue]


-@dataclasses.dataclass(frozen=True)
-class MatchNonTerminal:
-    name: str
-
-
-@dataclasses.dataclass(frozen=True)
-class Accept:
-    pass
-
-
-@dataclasses.dataclass(frozen=True)
-class StartGroup:
-    pass
-
-
-@dataclasses.dataclass(frozen=True)
-class EndGroup:
-    pass
-
-
-@dataclasses.dataclass(frozen=True)
-class StartIndent:
-    pass
-
-
-@dataclasses.dataclass(frozen=True)
-class EndIndent:
-    amount: int
-
-
-@dataclasses.dataclass(frozen=True)
-class Split:
-    left: int
-    right: int
-
-
-@dataclasses.dataclass(frozen=True)
-class Jump:
-    next: int
-
-
-MatchInstruction = (
-    MatchTerminal
-    | MatchNonTerminal
-    | Accept
-    | StartGroup
-    | EndGroup
-    | NewLine
-    | StartIndent
-    | EndIndent
-    | Split
-    | Jump
-)
-
-
-### THIS DOESN'T WORK
-###
-### YOU CANNOT MATCH RULES WITH TRANSPARENT CHILDREN WITH A FSM, THIS IS NOT
-### A REGULAR LANGUAGE IT IS CONTEXT FREE SO WE NEED TO RUN OUR REAL PARSER
-### WHICH MEANS YES WE NEED TO GENERATE TABLES AGAIN OUT OF SUB-GRAMMARS FOR
-### PRODUCTIONS BUT ALSO GENERATE NEW ONES FOR META AND ALSO RUN ACTIONS
-###
-### CHRIST.
-###
 class Matcher:
-    code: list[MatchInstruction]
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        raise NotImplementedError()

-    def __init__(self):
-        self.code = []

-    @dataclasses.dataclass
-    class ThreadState:
-        pc: int
-        position: int
-        count: int
-        results: list[Document | StartGroup | StartIndent]
+class NonTerminalMatcher(Matcher):
+    name: str
+    printer: "Printer"

-    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
-        threads: list[Matcher.ThreadState] = [
-            Matcher.ThreadState(pc=0, position=0, results=[], count=0)
-        ]
+    def __init__(self, name: str, printer: "Printer"):
+        self.name = name
+        self.printer = printer

-        while len(threads) > 0:
-            thread = threads.pop()
-            results = thread.results
-            while True:
-                thread.count += 1
-                if thread.count > 1000:
-                    raise Exception("Too many steps!")
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        if len(items) == 0:
+            return None

-                inst = self.code[thread.pc]
-                print(f"THREAD: {thread.pc}: {inst} ({thread.position})")
-                match inst:
-                    case MatchTerminal(name):
-                        if thread.position >= len(items):
-                            break
-
-                        item = items[thread.position]
-                        if not isinstance(item, runtime.TokenValue):
-                            break
-
-                        if item.kind != name:
-                            break
-
-                        results.append(Text(item.start, item.end))
-                        thread.pc += 1
-                        thread.position += 1
-
-                    case MatchNonTerminal(name):
-                        if thread.position >= len(items):
-                            break
-
-                        item = items[thread.position]
-                        if not isinstance(item, runtime.Tree):
-                            break
-
-                        if item.name != name:
-                            break
-
-                        def thunk(capture: runtime.Tree):
-                            return lambda: printer.convert_tree_to_document(capture)
-
-                        results.append(Lazy(thunk(item)))
-                        thread.pc += 1
-                        thread.position += 1
-
-                    case Accept():
-                        if thread.position != len(items):
-                            break
-
-                        result = None
-                        for r in thread.results:
-                            assert not isinstance(r, (StartGroup, StartIndent))
-                            result = cons(result, r)
-                        return result
-
-                    case StartGroup():
-                        results.append(inst)
-                        thread.pc += 1
-
-                    case EndGroup():
-                        group_items = None
-                        while not isinstance(results[-1], StartGroup):
-                            item = typing.cast(Document, results.pop())
-                            group_items = cons(item, group_items)
-                        results.pop()
-                        results.append(Group(group_items))
-                        thread.pc += 1
-
-                    case NewLine():
-                        results.append(NewLine())
-                        thread.pc += 1
-
-                    case StartIndent():
-                        results.append(inst)
-                        thread.pc += 1
-
-                    case EndIndent(amount):
-                        indent_items = None
-                        while not isinstance(results[-1], StartIndent):
-                            item = typing.cast(Document, results.pop())
-                            indent_items = cons(item, indent_items)
-                        results.pop()
-                        results.append(Indent(amount, indent_items))
-                        thread.pc += 1
-
-                    case Split(left, right):
-                        new_thread = Matcher.ThreadState(
-                            pc=right,
-                            position=thread.position,
-                            results=list(thread.results),
-                            count=0,
+        item = items[0]
+        if isinstance(item, runtime.Tree) and item.name == self.name:
+            return Match(
+                doc=Lazy(value=lambda: self.printer.convert_tree_to_document(item)),
+                remaining=items[1:],
            )
-                        threads.append(new_thread)
-                        thread.pc = left
-
-                    case Jump(where):
-                        thread.pc = where
-                        threads.append(thread)
-
-                    case _:
-                        typing.assert_never(inst)

        return None

-    def format(self) -> str:
-        return "\n".join(self.format_lines())

-    def format_lines(self) -> list[str]:
-        lines = []
-        code_len = int(math.log10(len(self.code))) + 1
-        for i, inst in enumerate(self.code):
-            lines.append(f"{i: >{code_len}} {inst}")
-        return lines
+class TerminalMatcher(Matcher):
+    name: str

-    @abc.abstractmethod
-    def format_into(self, lines: list[str], visited: dict["Matcher", int], indent: int = 0): ...
+    def __init__(self, name: str):
+        self.name = name
+
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        if len(items) == 0:
+            return None
+
+        item = items[0]
+        if isinstance(item, runtime.TokenValue) and item.kind == self.name:
+            return Match(
+                doc=Text(start=item.start, end=item.end),
+                remaining=items[1:],
+            )
+
+        return None
+
+
+class IndentMatcher(Matcher):
+    amount: int
+    child: Matcher
+
+    def __init__(self, amount: int, child: Matcher):
+        self.amount = amount
+        self.child = child
+
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        result = self.child.match(items)
+        if result is not None:
+            result.doc = Indent(amount=self.amount, doc=result.doc)
+
+        return result
+
+
+class NewLineMatcher(Matcher):
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        return Match(
+            doc=NewLine(),
+            remaining=items,
+        )
+
+
+class GroupMatcher(Matcher):
+    child: Matcher
+
+    def __init__(self, child: Matcher):
+        self.child = child
+
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        result = self.child.match(items)
+        if result is not None:
+            result.doc = Group(result.doc)
+
+        return result
+
+
+class CompleteMatcher(Matcher):
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        if len(items) == 0:
+            return Match(doc=None, remaining=[])
+        else:
+            return None
+
+
+class AlternativeMatcher(Matcher):
+    children: list[Matcher]
+
+    def __init__(self, children: list[Matcher] | None = None):
+        self.children = children or []
+
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        for child in self.children:
+            m = child.match(items)
+            if m is not None:
+                return m
+
+        return None
+
+
+class SequenceMatcher(Matcher):
+    children: list[Matcher]
+
+    def __init__(self, children: list[Matcher] | None = None):
+        self.children = children or []
+
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
+        doc = None
+        for child in self.children:
+            m = child.match(items)
+            if m is None:
+                return None
+
+            items = m.remaining
+            doc = Cons(doc, m.doc)
+
+        return Match(
+            doc=doc,
+            remaining=items,
+        )


 class PrettyMeta(parser.SyntaxMeta):
@ -290,92 +195,68 @@ class PrettyMeta(parser.SyntaxMeta):

 class Printer:
    grammar: parser.Grammar
-    _matchers: dict[str, Matcher]
-    _nonterminals: dict[str, parser.NonTerminal]
+    matchers: dict[str, Matcher]

    def __init__(self, grammar: parser.Grammar):
        self.grammar = grammar
-        self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
-        self._matchers = {}

    def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
-        return self._nonterminals[name]
+        raise NotImplementedError()

-    def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
-        matcher = Matcher()
-        code = matcher.code
-        patcher: dict[str, int] = {}
-
-        def compile_nonterminal(rule: parser.NonTerminal):
-            sub_start = patcher.get(rule.name)
-            if sub_start is not None:
-                code.append(Jump(sub_start))
-            else:
-                sub_start = len(code)
-                patcher[rule.name] = sub_start
-                tails = []
-                subs = list(rule.fn(self.grammar).flatten(with_metadata=True))
-                for sub in subs[:-1]:
-                    split_pos = len(code)
-                    code.append(Split(0, 0))
-
-                    compile_production(sub)
-
-                    tails.append(len(code))
-                    code.append(Jump(0))
-
-                    code[split_pos] = Split(sub_start + 1, len(code))
-                    sub_start = len(code)
-
-                compile_production(subs[-1])
-
-                for tail in tails:
-                    code[tail] = Jump(len(code))
-
-        def compile_production(production: parser.FlattenedWithMetadata):
+    def production_to_matcher(self, production: parser.FlattenedWithMetadata) -> Matcher:
+        results = []
        for item in production:
            if isinstance(item, str):
                rule = self.lookup_nonterminal(item)
                if rule.transparent:
-                        # If it's transparent then we need to inline the pattern here.
-                        compile_nonterminal(rule)
+                    # If it's transparent then we don't actually match a
+                    # nonterminal here, we need to match against the contents
+                    # of the rule, so we recurse.
+                    results.append(self.rule_to_matcher(rule))
                else:
-                        code.append(MatchNonTerminal(item))
+                    results.append(NonTerminalMatcher(item, self))

            elif isinstance(item, parser.Terminal):
                name = item.name
                assert name is not None
-                    code.append(MatchTerminal(name))
+                results.append(TerminalMatcher(name))

            else:
                meta, children = item

+                child = self.production_to_matcher(children)
+
                prettier = meta.get("prettier")
                if isinstance(prettier, PrettyMeta):
                    if prettier.indent:
-                            code.append(StartIndent())
-                        if prettier.group:
-                            code.append(StartGroup())
+                        child = IndentMatcher(prettier.indent, child)

-                    compile_production(children)
-
-                    if isinstance(prettier, PrettyMeta):
                    if prettier.group:
-                            code.append(EndGroup())
-                        if prettier.indent:
-                            code.append(EndIndent(prettier.indent))
+                        child = GroupMatcher(child)
+
+                    results.append(child)
+
                    if prettier.newline:
-                            code.append(NewLine())
+                        results.append(NewLineMatcher())

-        compile_nonterminal(rule)
-        code.append(Accept())
-        return matcher
+                else:
+                    results.append(child)
+
+        return SequenceMatcher(results)

    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
-        result = self._matchers.get(rule.name)
+        result = self.matchers.get(rule.name)
        if result is None:
-            result = self.compile_rule(rule)
-            self._matchers[rule.name] = result
+            # Create the empty alternative, be sure to set up the
+            alts = AlternativeMatcher()
+            if rule.transparent:
+                result = alts
+            else:
+                result = SequenceMatcher(children=[alts, CompleteMatcher()])
+            self.matchers[rule.name] = result
+
+            for production in rule.fn(self.grammar).flatten(with_metadata=True):
+                alts.children.append(self.production_to_matcher(production))

        return result

@ -385,16 +266,11 @@ class Printer:

        rule = self.lookup_nonterminal(name)
        matcher = self.rule_to_matcher(rule)
-        print(f"--------")
-        print(f"Matching with:\n{matcher.format()}")
-        m = matcher.match(self, list(tree.children))
-        print(f"--------")
-        if m is None:
-            raise ValueError(
-                f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}\nMatcher:\n{matcher.format()}"
-            )
-        # return m
-        return resolve_document(m)
+
+        m = matcher.match(list(tree.children))
+        assert m is not None, "Could not match a valid tree"  # TODO: Exception rather I think
+
+        return m.doc

    def format_tree(self, tree: runtime.Tree) -> str:
        doc = self.convert_tree_to_document(tree)