Actual pretty-printing!

Now we're cooking with gas ALTHOUGH now we have to deal with the fact that we're gluing everything together where there *should* be spaces. Many more improvements to come.
2024-09-11 11:08:02 -07:00 · 2024-09-11 11:08:02 -07:00 · d6dd54f4df
commit d6dd54f4df
parent 5d88b459b9
3 changed files with 201 additions and 21 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2673,29 +2673,48 @@ highlight = _Highlight()
 ###############################################################################
-# Pretty-printing metadata support
+# Formatting (pretty-printing) metadata support
 ###############################################################################
@dataclasses.dataclass
 class FormatMeta(SyntaxMeta):
-    newline: bool = False
+    newline: str | None = None
    indent: int | None = None
    group: bool = False
 def group(*rules: Rule) -> Rule:
    """Indicates that the text should be put on a single line if possible
    during pretty-printing. Has no effect on parsing.
    """
    return mark(seq(*rules), format=FormatMeta(group=True))
 def indent(*rules: Rule, amount: int | None = None) -> Rule:
    """Indicates a new level indentation during pretty-printing. The provided
    rules are otherwise treated as if they were in a sequence. This rule has
    no effect on parsing otherwise.
    The specified amount is the number of "indentation" values to indent the
    lines with. It defaults to 1.
    """
    if amount is None:
-        amount = 4
+        amount = 1
    return mark(seq(*rules), format=FormatMeta(indent=amount))
-def newline() -> Rule:
+def newline(text: str | None = None) -> Rule:
-    return mark(Nothing, format=FormatMeta(newline=True))
+    """Indicate that, during pretty-printing, the line can be broken here. Has
    no effect parsing.
    If text is provided, the text will be inserted before the line break. This
    allows for e.g. trailing commas in lists and whatnot to make things look
    prettier, when supported.
    """
    if text is None:
        text = ""
    return mark(Nothing, format=FormatMeta(newline=text))
 ###############################################################################
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -58,6 +58,145 @@ class Lazy:
 Document = None | Text | NewLine | Cons | Indent | Group | Lazy
 class DocumentLayout:
    segments: list[str | tuple[int, int]]
    def __init__(self, segments):
        self.segments = segments
    def apply_to_source(self, original: str) -> str:
        result = ""
        for segment in self.segments:
            if isinstance(segment, str):
                result += segment
            else:
                start, end = segment
                result += original[start:end]
        return result
 def layout_document(doc: Document, width: int) -> DocumentLayout:
    """Lay out a document to fit within the given width.
    The result of this function is a layout which can trivially be converted
    into a string given the original document.
    """
    @dataclasses.dataclass
    class Chunk:
        doc: Document
        indent: int
        flat: bool
        def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
    column = 0
    chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
    def fits(chunk: Chunk) -> bool:
        remaining = width - column
        if remaining <= 0:
            return False
        stack = list(chunks)
        stack.append(chunk)
        while len(stack) > 0:
            chunk = stack.pop()
            match chunk.doc:
                case None:
                    pass
                case Text(start, end):
                    remaining -= end - start
                case NewLine():
                    if chunk.flat:
                        # These are newlines that have been rendered flat,
                        # they are spaces I guess? TODO: Consider alternate
                        # forms, something that "goes here instead of
                        # newline", like maybe the empty string or... what?
                        remaining -= 1
                    else:
                        # These are newlines that are real, so it must have
                        # all fit.
                        return True
                case Cons(left, right):
                    stack.append(chunk.with_document(right))
                    stack.append(chunk.with_document(left))
                case Lazy():
                    stack.append(chunk.with_document(chunk.doc.resolve()))
                case Indent(amount, child):
                    stack.append(chunk.with_document(child, and_indent=amount))
                case Group(child):
                    # The difference between this approach and Justin's twist
                    # is that we consider the flat variable in Newline(),
                    # above, rather than here in Group. This makes us more
                    # like Wadler's original formulation, I guess. The
                    # grouping is an implicit transform over alternatives
                    # represented by newline. (If we have other kinds of
                    # alternatives we'll have to work those out elsewhere as
                    # well.)
                    stack.append(chunk.with_document(child))
                case _:
                    typing.assert_never(chunk.doc)
            if remaining < 0:
                return False
        return True  # Everything must fit, so great!
    output: list[str | tuple[int, int]] = []
    while len(chunks) > 0:
        chunk = chunks.pop()
        match chunk.doc:
            case None:
                pass
            case Text(start, end):
                output.append((start, end))
                column += end - start
            case NewLine():
                if chunk.flat:
                    # TODO: Custom newline flat mode. See also the
                    # corresponding comment in the "fits" function.
                    output.append(" ")
                    column += 1
                else:
                    # TODO: Custom newline expansion, custom indent segments.
                    output.append("\n" + (chunk.indent * " "))
                    column = chunk.indent
            case Cons(left, right):
                chunks.append(chunk.with_document(right))
                chunks.append(chunk.with_document(left))
            case Indent(amount, doc):
                chunks.append(chunk.with_document(doc, and_indent=amount))
            case Lazy():
                chunks.append(chunk.with_document(chunk.doc.resolve()))
            case Group(child):
                candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
                if chunk.flat or fits(candidate):
                    chunks.append(candidate)
                else:
                    chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
            case _:
                typing.assert_never(chunk)
    return DocumentLayout(output)
 def resolve_document(doc: Document) -> Document:
    match doc:
        case Cons(left, right):
@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document:
            return doc
 def layout_document(doc: Document) -> typing.Generator[str, None, None]:
    del doc
    raise NotImplementedError()
 def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
    # TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
    #       The naming condition is important but
    if isinstance(child, runtime.Tree):
        return f"tree_{child.name}"
    else:
@ -230,7 +366,7 @@ class Printer:
                            generated_grammar.append((rule_name, tx_children))
                            tx_children = [rule_name]
-                        if pretty.newline:
+                        if pretty.newline is not None:
                            if not done_newline:
                                generated_grammar.append(("newline", []))
                                done_newline = True
@ -272,6 +408,6 @@ class Printer:
            )
        return resolve_document(m)
-    def format_tree(self, tree: runtime.Tree) -> str:
+    def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
        doc = self.convert_tree_to_document(tree)
-        return next(layout_document(doc))
+        return layout_document(doc, width)
--- a/tests/test_wadler.py
+++ b/tests/test_wadler.py
@ -1,6 +1,6 @@
 import typing
-from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt
+from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
 import parser.runtime as runtime
 import parser.wadler as wadler
@ -29,13 +29,13 @@ class JsonGrammar(Grammar):
    @rule
    def object(self):
-        return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY)
+        return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
    @rule
    def _object_pairs(self):
        return alt(
-            self.object_pair + newline(),
+            newline() + self.object_pair,
-            self.object_pair + self.COMMA + newline() + self._object_pairs,
+            newline() + self.object_pair + self.COMMA + self._object_pairs,
        )
    @rule
@ -44,13 +44,13 @@ class JsonGrammar(Grammar):
    @rule
    def array(self):
-        return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE)
+        return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
    @rule
    def _array_items(self):
        return alt(
-            self.value + newline(),
+            newline() + self.value,
-            self.value + self.COMMA + newline() + self._array_items,
+            newline() + self.value + self.COMMA + self._array_items,
        )
    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
            typing.assert_never(doc)
-def test_basic_printer():
+def test_convert_tree_to_document():
    text = '{"a": true, "b":[1,2,3]}'
    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
    tree, errors = JSON_PARSER.parse(tokens)
@ -148,3 +148,28 @@ def test_basic_printer():
            "}",
        ]
    ]
 def test_layout_basic():
    text = '{"a": true, "b":[1,2,3]}'
    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
    tree, errors = JSON_PARSER.parse(tokens)
    assert [] == errors
    assert tree is not None
    printer = wadler.Printer(JSON)
    result = printer.format_tree(tree, 10).apply_to_source(text)
    assert (
        result
        == """
 {
 "a":true,
 "b":[
  1,
  2,
  3
 ]
 }
 """.strip()
    )