Actual pretty-printing!

Now we're cooking with gas ALTHOUGH now we have to deal with the fact that we're gluing everything together where there *should* be spaces. Many more improvements to come.
2024-09-11 11:08:02 -07:00 · 2024-09-11 11:08:02 -07:00 · d6dd54f4df
commit d6dd54f4df
parent 5d88b459b9
3 changed files with 201 additions and 21 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2673,29 +2673,48 @@ highlight = _Highlight()


 ###############################################################################
-# Pretty-printing metadata support
+# Formatting (pretty-printing) metadata support
 ###############################################################################


@dataclasses.dataclass
 class FormatMeta(SyntaxMeta):
-    newline: bool = False
+    newline: str | None = None
    indent: int | None = None
    group: bool = False


 def group(*rules: Rule) -> Rule:
+    """Indicates that the text should be put on a single line if possible
+    during pretty-printing. Has no effect on parsing.
+    """
    return mark(seq(*rules), format=FormatMeta(group=True))


 def indent(*rules: Rule, amount: int | None = None) -> Rule:
+    """Indicates a new level indentation during pretty-printing. The provided
+    rules are otherwise treated as if they were in a sequence. This rule has
+    no effect on parsing otherwise.
+
+    The specified amount is the number of "indentation" values to indent the
+    lines with. It defaults to 1.
+    """
    if amount is None:
-        amount = 4
+        amount = 1
    return mark(seq(*rules), format=FormatMeta(indent=amount))


-def newline() -> Rule:
-    return mark(Nothing, format=FormatMeta(newline=True))
+def newline(text: str | None = None) -> Rule:
+    """Indicate that, during pretty-printing, the line can be broken here. Has
+    no effect parsing.
+
+    If text is provided, the text will be inserted before the line break. This
+    allows for e.g. trailing commas in lists and whatnot to make things look
+    prettier, when supported.
+    """
+    if text is None:
+        text = ""
+    return mark(Nothing, format=FormatMeta(newline=text))


 ###############################################################################
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -58,6 +58,145 @@ class Lazy:
 Document = None | Text | NewLine | Cons | Indent | Group | Lazy


+class DocumentLayout:
+    segments: list[str | tuple[int, int]]
+
+    def __init__(self, segments):
+        self.segments = segments
+
+    def apply_to_source(self, original: str) -> str:
+        result = ""
+        for segment in self.segments:
+            if isinstance(segment, str):
+                result += segment
+            else:
+                start, end = segment
+                result += original[start:end]
+
+        return result
+
+
+def layout_document(doc: Document, width: int) -> DocumentLayout:
+    """Lay out a document to fit within the given width.
+
+    The result of this function is a layout which can trivially be converted
+    into a string given the original document.
+    """
+
+    @dataclasses.dataclass
+    class Chunk:
+        doc: Document
+        indent: int
+        flat: bool
+
+        def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
+            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
+
+    column = 0
+    chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
+
+    def fits(chunk: Chunk) -> bool:
+        remaining = width - column
+        if remaining <= 0:
+            return False
+
+        stack = list(chunks)
+        stack.append(chunk)
+        while len(stack) > 0:
+            chunk = stack.pop()
+            match chunk.doc:
+                case None:
+                    pass
+
+                case Text(start, end):
+                    remaining -= end - start
+
+                case NewLine():
+                    if chunk.flat:
+                        # These are newlines that have been rendered flat,
+                        # they are spaces I guess? TODO: Consider alternate
+                        # forms, something that "goes here instead of
+                        # newline", like maybe the empty string or... what?
+                        remaining -= 1
+                    else:
+                        # These are newlines that are real, so it must have
+                        # all fit.
+                        return True
+
+                case Cons(left, right):
+                    stack.append(chunk.with_document(right))
+                    stack.append(chunk.with_document(left))
+
+                case Lazy():
+                    stack.append(chunk.with_document(chunk.doc.resolve()))
+
+                case Indent(amount, child):
+                    stack.append(chunk.with_document(child, and_indent=amount))
+
+                case Group(child):
+                    # The difference between this approach and Justin's twist
+                    # is that we consider the flat variable in Newline(),
+                    # above, rather than here in Group. This makes us more
+                    # like Wadler's original formulation, I guess. The
+                    # grouping is an implicit transform over alternatives
+                    # represented by newline. (If we have other kinds of
+                    # alternatives we'll have to work those out elsewhere as
+                    # well.)
+                    stack.append(chunk.with_document(child))
+
+                case _:
+                    typing.assert_never(chunk.doc)
+
+            if remaining < 0:
+                return False
+
+        return True  # Everything must fit, so great!
+
+    output: list[str | tuple[int, int]] = []
+    while len(chunks) > 0:
+        chunk = chunks.pop()
+        match chunk.doc:
+            case None:
+                pass
+
+            case Text(start, end):
+                output.append((start, end))
+                column += end - start
+
+            case NewLine():
+                if chunk.flat:
+                    # TODO: Custom newline flat mode. See also the
+                    # corresponding comment in the "fits" function.
+                    output.append(" ")
+                    column += 1
+                else:
+                    # TODO: Custom newline expansion, custom indent segments.
+                    output.append("\n" + (chunk.indent * " "))
+                    column = chunk.indent
+
+            case Cons(left, right):
+                chunks.append(chunk.with_document(right))
+                chunks.append(chunk.with_document(left))
+
+            case Indent(amount, doc):
+                chunks.append(chunk.with_document(doc, and_indent=amount))
+
+            case Lazy():
+                chunks.append(chunk.with_document(chunk.doc.resolve()))
+
+            case Group(child):
+                candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
+                if chunk.flat or fits(candidate):
+                    chunks.append(candidate)
+                else:
+                    chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
+
+            case _:
+                typing.assert_never(chunk)
+
+    return DocumentLayout(output)
+
+
 def resolve_document(doc: Document) -> Document:
    match doc:
        case Cons(left, right):
@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document:
            return doc


-def layout_document(doc: Document) -> typing.Generator[str, None, None]:
-    del doc
-    raise NotImplementedError()
-
-
 def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
+    # TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
+    #       The naming condition is important but
    if isinstance(child, runtime.Tree):
        return f"tree_{child.name}"
    else:
@ -230,7 +366,7 @@ class Printer:
                            generated_grammar.append((rule_name, tx_children))
                            tx_children = [rule_name]

-                        if pretty.newline:
+                        if pretty.newline is not None:
                            if not done_newline:
                                generated_grammar.append(("newline", []))
                                done_newline = True
@ -272,6 +408,6 @@ class Printer:
            )
        return resolve_document(m)

-    def format_tree(self, tree: runtime.Tree) -> str:
+    def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
        doc = self.convert_tree_to_document(tree)
-        return next(layout_document(doc))
+        return layout_document(doc, width)
--- a/tests/test_wadler.py
+++ b/tests/test_wadler.py
@ -1,6 +1,6 @@
 import typing

-from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt
+from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent

 import parser.runtime as runtime
 import parser.wadler as wadler
@ -29,13 +29,13 @@ class JsonGrammar(Grammar):

    @rule
    def object(self):
-        return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY)
+        return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)

    @rule
    def _object_pairs(self):
        return alt(
-            self.object_pair + newline(),
-            self.object_pair + self.COMMA + newline() + self._object_pairs,
+            newline() + self.object_pair,
+            newline() + self.object_pair + self.COMMA + self._object_pairs,
        )

    @rule
@ -44,13 +44,13 @@ class JsonGrammar(Grammar):

    @rule
    def array(self):
-        return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE)
+        return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)

    @rule
    def _array_items(self):
        return alt(
-            self.value + newline(),
-            self.value + self.COMMA + newline() + self._array_items,
+            newline() + self.value,
+            newline() + self.value + self.COMMA + self._array_items,
        )

    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
            typing.assert_never(doc)


-def test_basic_printer():
+def test_convert_tree_to_document():
    text = '{"a": true, "b":[1,2,3]}'
    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
    tree, errors = JSON_PARSER.parse(tokens)
@ -148,3 +148,28 @@ def test_basic_printer():
            "}",
        ]
    ]
+
+
+def test_layout_basic():
+    text = '{"a": true, "b":[1,2,3]}'
+    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
+    tree, errors = JSON_PARSER.parse(tokens)
+    assert [] == errors
+    assert tree is not None
+
+    printer = wadler.Printer(JSON)
+    result = printer.format_tree(tree, 10).apply_to_source(text)
+
+    assert (
+        result
+        == """
+{
+ "a":true,
+ "b":[
+  1,
+  2,
+  3
+ ]
+}
+""".strip()
+    )