[wadler] Re-factor into multiple modules

Hard split between builder and runtime, as is proper.
2024-09-21 07:42:52 -07:00 · 2024-09-21 07:42:52 -07:00 · 1a3ce02d48
commit 1a3ce02d48
parent 1f84752538
4 changed files with 370 additions and 267 deletions
--- a/parser/wadler/init.py
+++ b/parser/wadler/init.py
@ -0,0 +1,5 @@
+# A prettier printer.
+from . import builder
+from . import runtime
+
+from .builder import *
--- a/parser/wadler/builder.py
+++ b/parser/wadler/builder.py
@ -0,0 +1,316 @@
+"""Data structures to support pretty-printing.
+
+Just like the parse tables, these tables could be written out in a different
+format and used to drive a pretty-printer written in another programming
+language, probably paired with a parser runtime written in that same language.
+"""
+
+import dataclasses
+import typing
+
+from .. import parser
+
+
+@dataclasses.dataclass
+class MatcherTable:
+    """Information necessary to create a document from a single node of a
+    concrete parse tree as generated by the parser.
+
+    A "document" in this case is a wadler-style document. See the
+    documentation of the module for what kinds of document nodes we expect
+    to generate.
+
+    The grammar contains extra metadata about how to add line-breaks and
+    whatnot, but that information was discarded during the parse. (We don't
+    need it!) That means we need to recover it after the fact. It would be
+    easy, except transparent rules mean that the series of tree children
+    form a context-free language instead of a regular language, and so we
+    actually need a full parser again to recover the structure.
+
+    The data to drive that parse is in `table`, which is an LR parse table of
+    the usual form produced by this parser generator. To build the document,
+    use the actions in the parse table to drive an LR parse, maintaining a
+    stack of documents as you go.
+
+    When matching terminals, interpret symbol names as follows:
+
+    - `token_[NAME]` symbols are token children in the tree node we're parsing.
+      (The token will have the name [NAME].) These should get shifted onto the
+      stack as plain-text document nodes.
+
+    - `tree_[KIND]` symbols are tree node children in the tree node we're
+      parsing. (The tree kind will be [KIND].) These should get shifted onto
+      the stack as document nodes, but recursively (by matching *their* children
+      with the same strategy.)
+
+    When reducing nonterminals, first concatenate all of the documents you remove
+    from the stack into a single document, then use the first character to
+    determine what (if any) additional work to do to the document:
+
+    - `i...` symbols are productions used to generated "indent" documents. The
+      `indent_amounts` dict indicates how far to indent each production. The
+      concatenated documents become the child of the indent.
+
+    - `g...` symbols are productions used to generate "group" documents. The
+      concatenated documents become the child of the group.
+
+    - `n...` symbols are productions that generate newlines. A newline document
+      should be created and appended to the concatenated documents. The
+      `newline_replace` dict indicates what the replacement text for the newline
+      document should be.
+
+    - `p...` symbols are just like `n...` symbols, except the newline symbol
+      is prepended instead of appended.
+
+    - `f...` symbols are like `n...` symbols, except that a force-break document
+      is appended instead of a newline document.
+
+    - `d...` symbols are like `f...` symbols, except that the force-break
+      document is prepended instead of appended.
+
+    - Any other prefix should be ignored.
+    """
+
+    # Parse table to recover the node into a document
+    table: parser.ParseTable
+    # Mapping from the name of i_ rules to indent counts
+    indent_amounts: dict[str, int]
+    # Mapping from the names of n_ rules to the text they flatten to
+    newline_replace: dict[str, str]
+
+
+def _compile_nonterminal_matcher(
+    grammar: parser.Grammar,
+    nonterminals: dict[str, parser.NonTerminal],
+    rule: parser.NonTerminal,
+) -> MatcherTable:
+    """Generate a matcher table for a single nonterminal.
+
+    See the docs for [MatcherTable] to understand the result.
+    """
+    generated_grammar: list[typing.Tuple[str, list[str]]] = []
+    visited: set[str] = set()
+
+    # In order to generate groups, indents, and newlines we need to
+    # synthesize new productions. And it happens sometimes that we get
+    # duplicates, repeated synthetic productions. It's important to
+    # de-duplicate productions, otherwise we'll wind up with ambiguities in
+    # the parser.
+    #
+    # These dictionaries track the synthetic rules: the keys are production
+    # and also the parameter (if any), and the values are the names of the
+    # productions that produce the effect.
+    #
+    groups: dict[tuple[str, ...], str] = {}
+    indents: dict[tuple[tuple[str, ...], int], str] = {}
+    newlines: dict[tuple[tuple[str, ...], str], str] = {}
+    prefix_count: int = 0
+
+    final_newlines: dict[str, str] = {}
+
+    def compile_nonterminal(name: str, rule: parser.NonTerminal):
+        if name not in visited:
+            visited.add(name)
+            for production in rule.fn(grammar).flatten(with_metadata=True):
+                trans_prod = compile_production(production)
+                generated_grammar.append((name, trans_prod))
+
+    def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
+        nonlocal groups
+        nonlocal indents
+        nonlocal newlines
+        nonlocal prefix_count
+        nonlocal final_newlines
+
+        prefix_stack: list[str] = []
+
+        result = []
+        for item in production:
+            if isinstance(item, str):
+                nt = nonterminals[item]
+                if nt.transparent:
+                    # If it's transparent then we make a new set of
+                    # productions that covers the contents of the
+                    # transparent nonterminal.
+                    name = "xxx_" + nt.name
+                    compile_nonterminal(name, nt)
+                    result.append(name)
+                else:
+                    # Otherwise it's a "token" in our input, named
+                    # "tree_{whatever}".
+                    result.append(f"tree_{item}")
+
+            elif isinstance(item, parser.Terminal):
+                # If it's a terminal it will appear in our input as
+                # "token_{whatever}".
+                result.append(f"token_{item.name}")
+
+            else:
+                meta, children = item
+                tx_children = compile_production(children)
+
+                pretty = meta.get("format")
+                if isinstance(pretty, parser.FormatMeta):
+                    if pretty.group:
+                        # Generate a group rule.
+                        child_key = tuple(tx_children)
+                        rule_name = groups.get(child_key)
+                        if rule_name is None:
+                            rule_name = f"g_{len(groups)}"
+                            groups[child_key] = rule_name
+                            generated_grammar.append((rule_name, tx_children))
+
+                        tx_children = [rule_name]
+
+                    if pretty.indent:
+                        # Generate an indent rule.
+                        child_key = (tuple(tx_children), pretty.indent)
+                        rule_name = indents.get(child_key)
+                        if rule_name is None:
+                            rule_name = f"i_{len(indents)}"
+                            indents[child_key] = rule_name
+                            generated_grammar.append((rule_name, tx_children))
+
+                        tx_children = [rule_name]
+
+                    if pretty.newline is not None:
+                        # Generate a newline rule.
+                        #
+                        # Newline rules are complicated because we need to avoid
+                        # having a production that has zero children. Zero-child
+                        # productions generate unpredictable parse trees, even
+                        # when "unambiguous".
+                        #
+                        # Our first hedge is: if don't have any children for
+                        # this production but we *have* already converted some
+                        # stuff, then take the stuff we've already converted as
+                        # our child and wrap it in a newline production. (This
+                        # works when the newline is not the first element in the
+                        # production.)
+                        #
+                        if len(tx_children) == 0:
+                            tx_children = result
+                            result = []
+
+                        if len(tx_children) > 0:
+                            # n == postfix newline.
+                            child_key = (tuple(tx_children), pretty.newline)
+                            rule_name = newlines.get(child_key)
+                            if rule_name is None:
+                                rule_name = f"n_{len(newlines)}"
+                                newlines[child_key] = rule_name
+                                generated_grammar.append((rule_name, tx_children))
+
+                            tx_children = [rule_name]
+
+                        else:
+                            # If we still have no tx_children then the newline must
+                            # be the first thing in the produciton. Ugh. We will
+                            # remember it for later, and apply it after we've
+                            # finished handling everything else.
+                            #
+                            # p == prefix newline
+                            rule_name = f"p_{prefix_count}"
+                            prefix_count += 1
+                            final_newlines[rule_name] = pretty.newline
+                            prefix_stack.append(rule_name)
+
+                    if pretty.forced_break:
+                        # Generate a force-break rule.
+                        #
+                        # This follows the same strategies as newlines with
+                        # respect to empty productions.
+                        if len(tx_children) == 0:
+                            tx_children = result
+                            result = []
+
+                        if len(tx_children) > 0:
+                            # f == postfix forced break
+                            rule_name = f"f_{prefix_count}"
+                            prefix_count += 1
+
+                            generated_grammar.append((rule_name, tx_children))
+                            tx_children = [rule_name]
+                        else:
+                            # d == prefix forced break (so-named because 'd' is
+                            # to the right of 'f' on my keyboard)
+                            rule_name = f"d_{prefix_count}"
+                            prefix_count += 1
+                            prefix_stack.append(rule_name)
+
+                # If it turned out to have formatting meta then we will have
+                # replaced or augmented the translated children appropriately.
+                # Otherwise, if it's highlighting meta or something else, we
+                # will have ignored it and the translated children should just
+                # be inserted inline.
+                result.extend(tx_children)
+
+        # Now is the time to handle any prefix rules, by wrapping the results in
+        # a new production for the prefix and replacing the results with that
+        # one.
+        while len(prefix_stack) > 0:
+            rule_name = prefix_stack.pop()
+            generated_grammar.append((rule_name, result))
+            result = [rule_name]
+
+        return result
+
+    start_name = f"yyy_{rule.name}"
+    compile_nonterminal(start_name, rule)
+    gen = grammar._generator(start_name, generated_grammar)
+    parse_table = gen.gen_table()
+
+    for (_, replacement), rule_name in newlines.items():
+        final_newlines[rule_name] = replacement
+
+    indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
+
+    return MatcherTable(
+        parse_table,
+        indent_amounts,
+        final_newlines,
+    )
+
+
+@dataclasses.dataclass
+class PrettyTable:
+    """Information necessary to convert a parsed tree into a wadler-style
+    pretty document, where it can then be formatted.
+
+    This is basically a bunch of "MatcherTables", one for each kind of tree,
+    that tell us how to recover document structure from the tree node. We also
+    record:
+
+    - The indentation string to use.
+    - The trivia modes of any terminals, for use in reconstructing trivia.
+    """
+
+    indent: str
+    trivia_modes: dict[str, parser.TriviaMode]
+    matchers: dict[str, MatcherTable]
+
+
+def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> PrettyTable:
+    """Generate a [PrettyTable] to drive a pretty-printer from a grammar."""
+    nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
+    matchers = {}
+
+    if indent is None:
+        indent = getattr(grammar, "pretty_indent", None)
+    if indent is None:
+        indent = " "
+
+    trivia_mode = {}
+    for t in grammar.terminals():
+        mode = t.meta.get("trivia_mode")
+        if t.name is not None and isinstance(mode, parser.TriviaMode):
+            trivia_mode[t.name] = mode
+
+    for name, rule in nonterminals.items():
+        matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
+
+    return PrettyTable(
+        indent,
+        trivia_mode,
+        matchers,
+    )
--- a/parser/wadler/runtime.py
+++ b/parser/wadler/runtime.py
@ -0,0 +1,604 @@
+import dataclasses
+import typing
+
+from . import builder
+from .. import parser
+from .. import runtime
+
+
+############################################################################
+# Documents
+############################################################################
+
+
+@dataclasses.dataclass(frozen=True)
+class Cons:
+    docs: list["Document"]
+
+
+@dataclasses.dataclass(frozen=True)
+class NewLine:
+    replace: str
+
+
+@dataclasses.dataclass(frozen=True)
+class ForceBreak:
+    silent: bool
+
+
+@dataclasses.dataclass(frozen=True)
+class Indent:
+    amount: int
+    doc: "Document"
+
+
+@dataclasses.dataclass(frozen=True)
+class Literal:
+    text: str
+
+
+@dataclasses.dataclass(frozen=True)
+class Group:
+    child: "Document"
+
+
+@dataclasses.dataclass(frozen=True)
+class Marker:
+    child: "Document"
+    meta: dict
+
+
+@dataclasses.dataclass(frozen=True)
+class Trivia:
+    child: "Document"
+
+
+@dataclasses.dataclass
+class Lazy:
+    value: typing.Callable[[], "Document"] | "Document"
+
+    def resolve(self) -> "Document":
+        if callable(self.value):
+            self.value = self.value()
+        return self.value
+
+    @classmethod
+    def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
+        return Lazy(lambda: printer.convert_tree_to_document(tree, src))
+
+
+Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
+
+
+def cons(*documents: Document) -> Document:
+    if len(documents) == 0:
+        return None
+
+    # TODO: Merge adjacent trivia together?
+
+    result = []
+    for document in documents:
+        if isinstance(document, Cons):
+            result.extend(document.docs)
+        elif document is not None:
+            result.append(document)
+
+    if len(result) == 0:
+        return None
+    if len(result) == 1:
+        return result[0]
+
+    return Cons(result)
+
+
+def group(document: Document) -> Document:
+    if document is None:
+        return None
+
+    if isinstance(document, Cons):
+        children = list(document.docs)
+    else:
+        children = [document]
+
+    # Split the trivia off the left and right of the incoming group: trivia
+    # at the edges shouldn't affect the inside of the group.
+    right_trivia: list[Document] = []
+    while len(children) > 0 and isinstance(children[-1], Trivia):
+        right_trivia.append(children.pop())
+
+    children.reverse()
+    left_trivia: list[Document] = []
+    while len(children) > 0 and isinstance(children[-1], Trivia):
+        left_trivia.append(children.pop())
+
+    # IF we still have more than one child, *then* we can actually make a
+    # group. (A group with one child is a waste. A group with no children
+    # doubly so.)
+    children.reverse()
+    if len(children) > 1:
+        children = [Group(cons(*children))]
+
+    results = left_trivia + children + right_trivia
+    return cons(*results)
+
+
+def trivia(document: Document) -> Document:
+    if document is None:
+        return None
+
+    if isinstance(document, Trivia):
+        return document
+
+    return Trivia(document)
+
+
+############################################################################
+# Layouts
+############################################################################
+
+
+class DocumentLayout:
+    """A structure that is trivially convertable to a string; the result of
+    layout out a document."""
+
+    segments: list[str | tuple[int, int]]
+
+    def __init__(self, segments):
+        self.segments = segments
+
+    def apply_to_source(self, original: str) -> str:
+        """Convert this layout to a string by copying chunks of the source
+        text into the right place.
+        """
+        result = ""
+        for segment in self.segments:
+            if isinstance(segment, str):
+                result += segment
+            else:
+                start, end = segment
+                result += original[start:end]
+
+        return result
+
+
+def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
+    """Lay out a document to fit within the given width.
+
+    The result of this function is a DocumentLayout which can trivially be
+    converted into a string given the original document.
+    """
+
+    @dataclasses.dataclass
+    class Chunk:
+        doc: Document
+        indent: int
+        flat: bool
+
+        def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
+            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
+
+    column = 0
+    chunks: list[Chunk] = [
+        Chunk(
+            doc=doc,
+            indent=0,
+            flat=False,  # NOTE: Assume flat until we know how to break.
+        )
+    ]
+
+    def fits(chunk: Chunk) -> bool:
+        remaining = width - column
+        if remaining <= 0:
+            return False
+
+        stack = list(chunks)
+        stack.append(chunk)
+        while len(stack) > 0:
+            chunk = stack.pop()
+            match chunk.doc:
+                case None:
+                    pass
+
+                case Literal(text):
+                    remaining -= len(text)
+
+                case NewLine(replace):
+                    if chunk.flat:
+                        remaining -= len(replace)
+                    else:
+                        # These are newlines that are real, so it must have
+                        # all fit.
+                        return True
+
+                case ForceBreak():
+                    # If we're in a flattened chunk then force it to break by
+                    # returning false here, otherwise we're at the end of the
+                    # line and yes, whatever you were asking about has fit.
+                    return not chunk.flat
+
+                case Cons(docs):
+                    stack.extend(chunk.with_document(doc) for doc in reversed(docs))
+
+                case Lazy():
+                    stack.append(chunk.with_document(chunk.doc.resolve()))
+
+                case Indent(amount, child):
+                    stack.append(chunk.with_document(child, and_indent=amount))
+
+                case Group(child):
+                    # The difference between this approach and Justin's twist
+                    # is that we consider the flat variable in Newline(),
+                    # above, rather than here in Group. This makes us more
+                    # like Wadler's original formulation, I guess. The
+                    # grouping is an implicit transform over alternatives
+                    # represented by newline. (If we have other kinds of
+                    # alternatives we'll have to work those out elsewhere as
+                    # well.)
+                    stack.append(chunk.with_document(child))
+
+                case Marker():
+                    stack.append(chunk.with_document(chunk.doc.child))
+
+                case Trivia(child):
+                    stack.append(chunk.with_document(child))
+
+                case _:
+                    typing.assert_never(chunk.doc)
+
+            if remaining < 0:
+                return False
+
+        return True  # Everything must fit, so great!
+
+    output: list[str | tuple[int, int]] = []
+    while len(chunks) > 0:
+        chunk = chunks.pop()
+        match chunk.doc:
+            case None:
+                pass
+
+            case Literal(text):
+                output.append(text)
+                column += len(text)
+
+            case NewLine(replace):
+                if chunk.flat:
+                    output.append(replace)
+                    column += len(replace)
+                else:
+                    # TODO: Custom newline expansion, custom indent segments.
+                    output.append("\n" + (chunk.indent * indent))
+                    column = chunk.indent * len(indent)
+
+            case ForceBreak(silent):
+                # TODO: Custom newline expansion, custom indent segments.
+                if not silent:
+                    output.append("\n" + (chunk.indent * indent))
+                    column = chunk.indent * len(indent)
+
+            case Cons(docs):
+                chunks.extend(chunk.with_document(doc) for doc in reversed(docs))
+
+            case Indent(amount, doc):
+                chunks.append(chunk.with_document(doc, and_indent=amount))
+
+            case Lazy():
+                chunks.append(chunk.with_document(chunk.doc.resolve()))
+
+            case Group(child):
+                candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
+                if chunk.flat or fits(candidate):
+                    chunks.append(candidate)
+                else:
+                    chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
+
+            case Marker():
+                chunks.append(chunk.with_document(chunk.doc.child))
+
+            case Trivia(child):
+                chunks.append(chunk.with_document(child))
+
+            case _:
+                typing.assert_never(chunk)
+
+    return DocumentLayout(output)
+
+
+def resolve_document(doc: Document) -> Document:
+    match doc:
+        case Cons(docs):
+            docs = [resolve_document(d) for d in docs]
+            return cons(*docs)
+
+        case Lazy(_):
+            return resolve_document(doc.resolve())
+
+        case Group(doc):
+            return group(resolve_document(doc))
+
+        case Marker(child, meta):
+            return Marker(resolve_document(child), meta)
+
+        case Trivia(child):
+            return Trivia(resolve_document(child))
+
+        case Literal() | NewLine() | ForceBreak() | Indent() | None:
+            return doc
+
+        case _:
+            typing.assert_never(doc)
+
+
+def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
+    if isinstance(child, runtime.Tree):
+        return f"tree_{child.name}"
+    else:
+        return f"token_{child.kind}"
+
+
+def slice_pre_post_trivia(
+    trivia_mode: dict[str, parser.TriviaMode],
+    trivia_tokens: list[runtime.TokenValue],
+) -> tuple[
+    list[tuple[parser.TriviaMode, runtime.TokenValue]],
+    list[tuple[parser.TriviaMode, runtime.TokenValue]],
+]:
+    tokens = [
+        (trivia_mode.get(token.kind, parser.TriviaMode.Blank), token) for token in trivia_tokens
+    ]
+
+    for index, (mode, token) in enumerate(tokens):
+        if token.start == 0:
+            # Everything is pre-trivia if we're at the start of the file.
+            return (tokens, [])
+
+        if mode == parser.TriviaMode.NewLine:
+            # This is the first newline; it belongs with the pre-trivia.
+            return (tokens[index:], tokens[:index])
+
+    # If we never found a new line then it's all post-trivia.
+    return ([], tokens)
+
+
+############################################################################
+# The Actual Pretty Printer
+############################################################################
+
+
+class Matcher:
+    table: builder.MatcherTable
+    trivia_mode: dict[str, parser.TriviaMode]
+
+    def __init__(self, table: builder.MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
+        self.table = table
+        self.trivia_mode = trivia_mode
+
+    def match(
+        self,
+        printer: "Printer",
+        items: list[runtime.Tree | runtime.TokenValue],
+        src: str,
+    ) -> Document:
+        stack: list[tuple[int, Document]] = [(0, None)]
+        table = self.table.table
+
+        # eof_trivia = []
+        # if len(items) > 0:
+        #     item = items[-1]
+        #     if isinstance(item, runtime.TokenValue):
+        #         eof_trivia = item.post_trivia
+
+        input = [(child_to_name(i), i) for i in items] + [
+            (
+                "$",
+                runtime.TokenValue(
+                    kind="$",
+                    start=0,
+                    end=0,
+                    pre_trivia=[],
+                    post_trivia=[],
+                ),
+            )
+        ]
+        input_index = 0
+
+        while True:
+            current_token = input[input_index]
+            current_state = stack[-1][0]
+            action = table.actions[current_state].get(current_token[0], parser.Error())
+
+            match action:
+                case parser.Accept():
+                    result = stack[-1][1]
+                    # result = cons(result, self.apply_trivia(eof_trivia))
+                    return result
+
+                case parser.Reduce(name=name, count=size):
+                    child: Document = None
+                    if size > 0:
+                        for _, c in stack[-size:]:
+                            if c is None:
+                                continue
+                            child = cons(child, c)
+                        del stack[-size:]
+
+                    if name[0] == "g":
+                        child = group(child)
+
+                    elif name[0] == "i":
+                        amount = self.table.indent_amounts[name]
+                        child = Indent(amount, child)
+
+                    elif name[0] == "n":
+                        replace = self.table.newline_replace[name]
+                        child = cons(child, NewLine(replace))
+
+                    elif name[0] == "p":
+                        replace = self.table.newline_replace[name]
+                        child = cons(NewLine(replace), child)
+
+                    elif name[0] == "f":
+                        child = cons(child, ForceBreak(False))
+
+                    elif name[0] == "d":
+                        child = cons(ForceBreak(False), child)
+
+                    else:
+                        pass  # Reducing a transparent rule probably.
+
+                    goto = table.gotos[stack[-1][0]].get(name)
+                    assert goto is not None
+                    stack.append((goto, child))
+
+                case parser.Shift():
+                    value = current_token[1]
+
+                    if isinstance(value, runtime.Tree):
+                        child = Lazy.from_tree(value, src, printer)
+                    else:
+                        child = cons(
+                            trivia(self.apply_pre_trivia(value.pre_trivia, src)),
+                            Literal(src[value.start : value.end]),
+                            trivia(self.apply_post_trivia(value.post_trivia, src)),
+                        )
+
+                    stack.append((action.state, child))
+                    input_index += 1
+
+                case parser.Error():
+                    raise Exception("How did I get a parse error here??")
+
+    def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
+        pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
+        if len(pre_trivia) == 0:
+            return None
+
+        at_start_of_file = pre_trivia[0][1].start == 0
+
+        trivia_doc = None
+        new_line_count = 0
+        for mode, token in pre_trivia:
+            match mode:
+                case parser.TriviaMode.LineComment:
+                    trivia_doc = cons(
+                        trivia_doc,
+                        Literal(src[token.start : token.end]),
+                        ForceBreak(False),
+                    )
+                    new_line_count = 0  # There will be a newline after this.
+                    at_start_of_file = False
+
+                case parser.TriviaMode.Blank:
+                    pass
+
+                case parser.TriviaMode.NewLine:
+                    new_line_count += 1
+                    if new_line_count == 2 and not at_start_of_file:
+                        trivia_doc = cons(
+                            trivia_doc,
+                            ForceBreak(False),
+                        )
+
+                case _:
+                    typing.assert_never(mode)
+
+        return trivia_doc
+
+    def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
+        if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
+            return self.apply_eof_trivia(trivia_tokens, src)
+
+        _, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
+
+        trivia_doc = None
+        for mode, token in post_trivia:
+            match mode:
+                case parser.TriviaMode.Blank:
+                    pass
+
+                case parser.TriviaMode.NewLine:
+                    # Anything after a line break is not processed as post
+                    # trivia.
+                    break
+
+                case parser.TriviaMode.LineComment:
+                    # Because this is post-trivia, we know there's something
+                    # to our left, and we can force the space.
+                    trivia_doc = cons(
+                        Literal(" "),
+                        Literal(src[token.start : token.end]),
+                        ForceBreak(True),  # And the line needs to end.
+                    )
+                    break
+
+                case _:
+                    typing.assert_never(mode)
+
+        return trivia_doc
+
+    def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
+        # EOF trivia has weird rules, namely, it's like pre and post joined together but.
+        tokens = [
+            (self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
+            for token in trivia_tokens
+        ]
+
+        at_start = True
+        newline_count = 0
+        trivia_doc = None
+        for mode, token in tokens:
+            match mode:
+                case parser.TriviaMode.Blank:
+                    pass
+
+                case parser.TriviaMode.NewLine:
+                    at_start = False
+                    newline_count += 1
+                    if newline_count <= 2:
+                        trivia_doc = cons(trivia_doc, ForceBreak(False))
+
+                case parser.TriviaMode.LineComment:
+                    # Because this is post-trivia, we know there's something
+                    # to our left, and we can force the space.
+                    trivia_doc = cons(
+                        trivia_doc,
+                        Literal(" ") if at_start else None,
+                        Literal(src[token.start : token.end]),
+                    )
+                    newline_count = 0
+                    at_start = False
+
+                case _:
+                    typing.assert_never(mode)
+
+        return trivia_doc
+
+
+class Printer:
+    table: builder.PrettyTable
+    matchers: dict[str, Matcher]
+
+    def __init__(self, table: builder.PrettyTable):
+        self.table = table
+        self.matchers = {
+            name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
+        }
+
+    def indent(self) -> str:
+        return self.table.indent
+
+    def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
+        name = tree.name
+        assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
+
+        matcher = self.matchers[name]
+        m = matcher.match(self, list(tree.children), src)
+        if m is None:
+            raise ValueError(
+                f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
+            )
+        return resolve_document(m)
+
+    def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
+        doc = self.convert_tree_to_document(tree, src)
+        return layout_document(doc, width, self.table.indent)