3 changed files with 174 additions and 330 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2815,12 +2815,10 @@ class Grammar:
    def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
        return self._precedence.get(name)
    # TODO: The flattened form should retain NonTerminal, not just str.
    def generate_nonterminal_dict(
        self, start: str | None = None
    ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
-        """Convert the rules into a dictionary of productions, and a set of
+        """Convert the rules into a dictionary of productions.
        the names of transparent nonterminals.
        Our table generators work on a very flat set of productions. This is the
        first step in flattening the productions from the members: walk the rules
@ -2840,8 +2838,6 @@ class Grammar:
        rule = nonterminals.get(start)
        if rule is None:
            raise ValueError(f"Cannot find a rule named '{start}'")
        if rule.transparent:
            raise ValueError("The start rule cannot be transparent")
        queue = [rule]
        while len(queue) > 0:
            rule = queue.pop()
--- a/parser/runtime.py
+++ b/parser/runtime.py
@ -22,29 +22,6 @@ class Tree:
    end: int
    children: typing.Tuple["Tree | TokenValue", ...]
    def format_lines(self, source: str | None = None) -> list[str]:
        lines = []
        def format_node(node: Tree | TokenValue, indent: int):
            match node:
                case Tree(name=name, start=start, end=end, children=children):
                    lines.append((" " * indent) + f"{name or '???'} [{start}, {end})")
                    for child in children:
                        format_node(child, indent + 2)
                case TokenValue(kind=kind, start=start, end=end):
                    if source is not None:
                        value = f":'{source[start:end]}'"
                    else:
                        value = ""
                    lines.append((" " * indent) + f"{kind}{value} [{start}, {end})")
        format_node(self, 0)
        return lines
    def format(self, source: str | None = None) -> str:
        return "\n".join(self.format_lines(source))
@dataclass
 class ParseError:
@ -301,15 +278,13 @@ class TokenStream(typing.Protocol):
        ...
 # TODO: This runtime API sucks; the TokenStream is nice and all but I should
 #       also be able to have a function that takes a string and produces a
 #       tree directly, with caching intermediates for codegen and whatnot.
 class Parser:
    # Our stack is a stack of tuples, where the first entry is the state
    # number and the second entry is the 'value' that was generated when the
    # state was pushed.
    table: parser.ParseTable
-    def __init__(self, table: parser.ParseTable):
+    def __init__(self, table):
        self.table = table
    def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
@ -326,9 +301,6 @@ class Parser:
        input = input + [TokenValue(kind="$", start=eof, end=eof)]
        input_index = 0
        # Our stack is a stack of tuples, where the first entry is the state
        # number and the second entry is the 'value' that was generated when
        # the state was pushed.
        stack: ParseStack = [(0, None)]
        result: Tree | None = None
        errors: list[ParseError] = []
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -1,7 +1,5 @@
 # A prettier printer.
 import abc
 import dataclasses
 import math
 import typing
 from . import parser
@ -14,13 +12,6 @@ class Cons:
    right: "Document"
 def cons(left: "Document", right: "Document") -> "Document":
    if left and right:
        return Cons(left, right)
    else:
        return left or right
@dataclasses.dataclass(frozen=True)
 class NewLine:
    pass
@ -56,230 +47,144 @@ class Lazy:
 Document = None | Text | NewLine | Cons | Indent | Group | Lazy
 def resolve_document(doc: Document) -> Document:
    match doc:
        case Cons(left, right):
            lr = resolve_document(left)
            rr = resolve_document(right)
            if lr is not left or rr is not right:
                return cons(lr, rr)
            else:
                return doc
        case Lazy(_):
            return doc.resolve()
        case _:
            return doc
 def layout_document(doc: Document) -> typing.Generator[str, None, None]:
    del doc
    raise NotImplementedError()
-@dataclasses.dataclass(frozen=True)
+@dataclasses.dataclass
-class MatchTerminal:
+class Match:
-    name: str
+    doc: Document
    remaining: list[runtime.Tree | runtime.TokenValue]
@dataclasses.dataclass(frozen=True)
 class MatchNonTerminal:
    name: str
@dataclasses.dataclass(frozen=True)
 class Accept:
    pass
@dataclasses.dataclass(frozen=True)
 class StartGroup:
    pass
@dataclasses.dataclass(frozen=True)
 class EndGroup:
    pass
@dataclasses.dataclass(frozen=True)
 class StartIndent:
    pass
@dataclasses.dataclass(frozen=True)
 class EndIndent:
    amount: int
@dataclasses.dataclass(frozen=True)
 class Split:
    left: int
    right: int
@dataclasses.dataclass(frozen=True)
 class Jump:
    next: int
 MatchInstruction = (
    MatchTerminal
    | MatchNonTerminal
    | Accept
    | StartGroup
    | EndGroup
    | NewLine
    | StartIndent
    | EndIndent
    | Split
    | Jump
 )
 ### THIS DOESN'T WORK
 ###
 ### YOU CANNOT MATCH RULES WITH TRANSPARENT CHILDREN WITH A FSM, THIS IS NOT
 ### A REGULAR LANGUAGE IT IS CONTEXT FREE SO WE NEED TO RUN OUR REAL PARSER
 ### WHICH MEANS YES WE NEED TO GENERATE TABLES AGAIN OUT OF SUB-GRAMMARS FOR
 ### PRODUCTIONS BUT ALSO GENERATE NEW ONES FOR META AND ALSO RUN ACTIONS
 ###
 ### CHRIST.
 ###
 class Matcher:
-    code: list[MatchInstruction]
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        raise NotImplementedError()
    def __init__(self):
        self.code = []
-    @dataclasses.dataclass
+class NonTerminalMatcher(Matcher):
-    class ThreadState:
+    name: str
-        pc: int
+    printer: "Printer"
        position: int
        count: int
        results: list[Document | StartGroup | StartIndent]
-    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
+    def __init__(self, name: str, printer: "Printer"):
-        threads: list[Matcher.ThreadState] = [
+        self.name = name
-            Matcher.ThreadState(pc=0, position=0, results=[], count=0)
+        self.printer = printer
        ]
-        while len(threads) > 0:
+    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
-            thread = threads.pop()
+        if len(items) == 0:
-            results = thread.results
+            return None
            while True:
                thread.count += 1
                if thread.count > 1000:
                    raise Exception("Too many steps!")
-                inst = self.code[thread.pc]
+        item = items[0]
-                print(f"THREAD: {thread.pc}: {inst} ({thread.position})")
+        if isinstance(item, runtime.Tree) and item.name == self.name:
-                match inst:
+            return Match(
-                    case MatchTerminal(name):
+                doc=Lazy(value=lambda: self.printer.convert_tree_to_document(item)),
-                        if thread.position >= len(items):
+                remaining=items[1:],
-                            break
+            )
                        item = items[thread.position]
                        if not isinstance(item, runtime.TokenValue):
                            break
                        if item.kind != name:
                            break
                        results.append(Text(item.start, item.end))
                        thread.pc += 1
                        thread.position += 1
                    case MatchNonTerminal(name):
                        if thread.position >= len(items):
                            break
                        item = items[thread.position]
                        if not isinstance(item, runtime.Tree):
                            break
                        if item.name != name:
                            break
                        def thunk(capture: runtime.Tree):
                            return lambda: printer.convert_tree_to_document(capture)
                        results.append(Lazy(thunk(item)))
                        thread.pc += 1
                        thread.position += 1
                    case Accept():
                        if thread.position != len(items):
                            break
                        result = None
                        for r in thread.results:
                            assert not isinstance(r, (StartGroup, StartIndent))
                            result = cons(result, r)
                        return result
                    case StartGroup():
                        results.append(inst)
                        thread.pc += 1
                    case EndGroup():
                        group_items = None
                        while not isinstance(results[-1], StartGroup):
                            item = typing.cast(Document, results.pop())
                            group_items = cons(item, group_items)
                        results.pop()
                        results.append(Group(group_items))
                        thread.pc += 1
                    case NewLine():
                        results.append(NewLine())
                        thread.pc += 1
                    case StartIndent():
                        results.append(inst)
                        thread.pc += 1
                    case EndIndent(amount):
                        indent_items = None
                        while not isinstance(results[-1], StartIndent):
                            item = typing.cast(Document, results.pop())
                            indent_items = cons(item, indent_items)
                        results.pop()
                        results.append(Indent(amount, indent_items))
                        thread.pc += 1
                    case Split(left, right):
                        new_thread = Matcher.ThreadState(
                            pc=right,
                            position=thread.position,
                            results=list(thread.results),
                            count=0,
                        )
                        threads.append(new_thread)
                        thread.pc = left
                    case Jump(where):
                        thread.pc = where
                        threads.append(thread)
                    case _:
                        typing.assert_never(inst)
        return None
    def format(self) -> str:
        return "\n".join(self.format_lines())
-    def format_lines(self) -> list[str]:
+class TerminalMatcher(Matcher):
-        lines = []
+    name: str
        code_len = int(math.log10(len(self.code))) + 1
        for i, inst in enumerate(self.code):
            lines.append(f"{i: >{code_len}} {inst}")
        return lines
-    @abc.abstractmethod
+    def __init__(self, name: str):
-    def format_into(self, lines: list[str], visited: dict["Matcher", int], indent: int = 0): ...
+        self.name = name
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        if len(items) == 0:
            return None
        item = items[0]
        if isinstance(item, runtime.TokenValue) and item.kind == self.name:
            return Match(
                doc=Text(start=item.start, end=item.end),
                remaining=items[1:],
            )
        return None
 class IndentMatcher(Matcher):
    amount: int
    child: Matcher
    def __init__(self, amount: int, child: Matcher):
        self.amount = amount
        self.child = child
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        result = self.child.match(items)
        if result is not None:
            result.doc = Indent(amount=self.amount, doc=result.doc)
        return result
 class NewLineMatcher(Matcher):
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        return Match(
            doc=NewLine(),
            remaining=items,
        )
 class GroupMatcher(Matcher):
    child: Matcher
    def __init__(self, child: Matcher):
        self.child = child
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        result = self.child.match(items)
        if result is not None:
            result.doc = Group(result.doc)
        return result
 class CompleteMatcher(Matcher):
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        if len(items) == 0:
            return Match(doc=None, remaining=[])
        else:
            return None
 class AlternativeMatcher(Matcher):
    children: list[Matcher]
    def __init__(self, children: list[Matcher] | None = None):
        self.children = children or []
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        for child in self.children:
            m = child.match(items)
            if m is not None:
                return m
        return None
 class SequenceMatcher(Matcher):
    children: list[Matcher]
    def __init__(self, children: list[Matcher] | None = None):
        self.children = children or []
    def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
        doc = None
        for child in self.children:
            m = child.match(items)
            if m is None:
                return None
            items = m.remaining
            doc = Cons(doc, m.doc)
        return Match(
            doc=doc,
            remaining=items,
        )
 class PrettyMeta(parser.SyntaxMeta):
@ -290,92 +195,68 @@ class PrettyMeta(parser.SyntaxMeta):
 class Printer:
    grammar: parser.Grammar
-    _matchers: dict[str, Matcher]
+    matchers: dict[str, Matcher]
    _nonterminals: dict[str, parser.NonTerminal]
    def __init__(self, grammar: parser.Grammar):
        self.grammar = grammar
        self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
        self._matchers = {}
    def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
-        return self._nonterminals[name]
+        raise NotImplementedError()
-    def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
+    def production_to_matcher(self, production: parser.FlattenedWithMetadata) -> Matcher:
-        matcher = Matcher()
+        results = []
-        code = matcher.code
+        for item in production:
-        patcher: dict[str, int] = {}
+            if isinstance(item, str):
                rule = self.lookup_nonterminal(item)
                if rule.transparent:
                    # If it's transparent then we don't actually match a
                    # nonterminal here, we need to match against the contents
                    # of the rule, so we recurse.
                    results.append(self.rule_to_matcher(rule))
                else:
                    results.append(NonTerminalMatcher(item, self))
            elif isinstance(item, parser.Terminal):
                name = item.name
                assert name is not None
                results.append(TerminalMatcher(name))
        def compile_nonterminal(rule: parser.NonTerminal):
            sub_start = patcher.get(rule.name)
            if sub_start is not None:
                code.append(Jump(sub_start))
            else:
-                sub_start = len(code)
+                meta, children = item
                patcher[rule.name] = sub_start
                tails = []
                subs = list(rule.fn(self.grammar).flatten(with_metadata=True))
                for sub in subs[:-1]:
                    split_pos = len(code)
                    code.append(Split(0, 0))
-                    compile_production(sub)
+                child = self.production_to_matcher(children)
-                    tails.append(len(code))
+                prettier = meta.get("prettier")
-                    code.append(Jump(0))
+                if isinstance(prettier, PrettyMeta):
                    if prettier.indent:
                        child = IndentMatcher(prettier.indent, child)
-                    code[split_pos] = Split(sub_start + 1, len(code))
+                    if prettier.group:
-                    sub_start = len(code)
+                        child = GroupMatcher(child)
-                compile_production(subs[-1])
+                    results.append(child)
-                for tail in tails:
+                    if prettier.newline:
-                    code[tail] = Jump(len(code))
+                        results.append(NewLineMatcher())
        def compile_production(production: parser.FlattenedWithMetadata):
            for item in production:
                if isinstance(item, str):
                    rule = self.lookup_nonterminal(item)
                    if rule.transparent:
                        # If it's transparent then we need to inline the pattern here.
                        compile_nonterminal(rule)
                    else:
                        code.append(MatchNonTerminal(item))
                elif isinstance(item, parser.Terminal):
                    name = item.name
                    assert name is not None
                    code.append(MatchTerminal(name))
                else:
-                    meta, children = item
+                    results.append(child)
-                    prettier = meta.get("prettier")
+        return SequenceMatcher(results)
                    if isinstance(prettier, PrettyMeta):
                        if prettier.indent:
                            code.append(StartIndent())
                        if prettier.group:
                            code.append(StartGroup())
                    compile_production(children)
                    if isinstance(prettier, PrettyMeta):
                        if prettier.group:
                            code.append(EndGroup())
                        if prettier.indent:
                            code.append(EndIndent(prettier.indent))
                        if prettier.newline:
                            code.append(NewLine())
        compile_nonterminal(rule)
        code.append(Accept())
        return matcher
    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
-        result = self._matchers.get(rule.name)
+        result = self.matchers.get(rule.name)
        if result is None:
-            result = self.compile_rule(rule)
+            # Create the empty alternative, be sure to set up the
-            self._matchers[rule.name] = result
+            alts = AlternativeMatcher()
            if rule.transparent:
                result = alts
            else:
                result = SequenceMatcher(children=[alts, CompleteMatcher()])
            self.matchers[rule.name] = result
            for production in rule.fn(self.grammar).flatten(with_metadata=True):
                alts.children.append(self.production_to_matcher(production))
        return result
@ -385,16 +266,11 @@ class Printer:
        rule = self.lookup_nonterminal(name)
        matcher = self.rule_to_matcher(rule)
-        print(f"--------")
+
-        print(f"Matching with:\n{matcher.format()}")
+        m = matcher.match(list(tree.children))
-        m = matcher.match(self, list(tree.children))
+        assert m is not None, "Could not match a valid tree"  # TODO: Exception rather I think
-        print(f"--------")
+
-        if m is None:
+        return m.doc
            raise ValueError(
                f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}\nMatcher:\n{matcher.format()}"
            )
        # return m
        return resolve_document(m)
    def format_tree(self, tree: runtime.Tree) -> str:
        doc = self.convert_tree_to_document(tree)