Support newline replacements

This allows us to do maybe more complicated spacing. Still unclear about identifier/punctuation spacing.
2024-09-12 11:09:14 -07:00 · 2024-09-12 11:09:14 -07:00 · 938f0e5c69
commit 938f0e5c69
parent b3b2102864
2 changed files with 101 additions and 56 deletions
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -21,7 +21,7 @@ def cons(left: "Document", right: "Document") -> "Document":
@dataclasses.dataclass(frozen=True)
 class NewLine:
-    pass
+    replace: str
@dataclasses.dataclass(frozen=True)
@ -119,13 +119,9 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
                case Literal(text):
                    remaining -= len(text)
-                case NewLine():
+                case NewLine(replace):
                    if chunk.flat:
-                        # These are newlines that have been rendered flat,
+                        remaining -= len(replace)
                        # they are spaces I guess? TODO: Consider alternate
                        # forms, something that "goes here instead of
                        # newline", like maybe the empty string or... what?
                        remaining -= 1
                    else:
                        # These are newlines that are real, so it must have
                        # all fit.
@ -175,12 +171,10 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
                output.append(text)
                column += len(text)
-            case NewLine():
+            case NewLine(replace):
                if chunk.flat:
-                    # TODO: Custom newline flat mode. See also the
+                    output.append(replace)
-                    # corresponding comment in the "fits" function.
+                    column += len(replace)
                    output.append(" ")
                    column += 1
                else:
                    # TODO: Custom newline expansion, custom indent segments.
                    output.append("\n" + (chunk.indent * " "))
@ -220,7 +214,7 @@ def resolve_document(doc: Document) -> Document:
                return doc
        case Lazy(_):
-            return doc.resolve()
+            return resolve_document(doc.resolve())
        case _:
            return doc
@ -239,16 +233,19 @@ class Matcher:
    table: parser.ParseTable
    indent_amounts: dict[str, int]
    text_follow: dict[str, str]
    newline_replace: dict[str, str]
    def __init__(
        self,
        table: parser.ParseTable,
        indent_amounts: dict[str, int],
        text_follow: dict[str, str],
        newline_replace: dict[str, str],
    ):
        self.table = table
        self.indent_amounts = indent_amounts
        self.text_follow = text_follow
        self.newline_replace = newline_replace
    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
        stack: list[tuple[int, Document]] = [(0, None)]
@ -294,15 +291,17 @@ class Matcher:
                        child = Indent(amount, child)
                    elif name[0] == "n":
-                        child = cons(child, NewLine())
+                        replace = self.newline_replace[name]
                        print(f"!!!! {name} -> {repr(replace)}")
                        child = cons(child, NewLine(replace))
                    elif name[0] == "p":
-                        child = cons(NewLine(), child)
+                        child = cons(NewLine(""), child)
                    else:
                        pass  # Reducing a transparent rule probably.
-                    goto = self.table.gotos[stack[-1][0]].get(name)
+                    goto = table.gotos[stack[-1][0]].get(name)
                    assert goto is not None
                    stack.append((goto, child))
@ -315,7 +314,27 @@ class Matcher:
                        if value.name:
                            follow = self.text_follow.get(value.name)
                    else:
                        # Here is where we consider ephemera. We can say: if
                        # the trailing ephemera includes a blank, then we
                        # insert a blank here. We do not want to double-count
                        # blanks, maybe we can have some kind of a notion of
                        # what is a blank.
                        #
                        # A wierd digression: one thing that's weird is that
                        # blank spaces are always kinda culturally assumed?
                        # But the computer always has to be taught. In hand-
                        # printers, the spaces are added by a person and the
                        # person doesn't think twice. We are in the unique
                        # position of "generalizing" the blank space for
                        # formatting purposes.
                        child = Text(value.start, value.end)
                        for trivia in value.pre_trivia:
                            pass
                        for trivia in value.post_trivia:
                            pass
                        follow = self.text_follow.get(value.kind)
                    if follow is not None:
@ -357,6 +376,7 @@ class Printer:
        group_count = 0
        indent_amounts: dict[str, int] = {}
        done_newline = False
        newline_map: dict[str, str] = {}
        def compile_nonterminal(name: str, rule: parser.NonTerminal):
            if name not in visited:
@ -411,10 +431,13 @@ class Printer:
                            tx_children = [rule_name]
                        if pretty.newline is not None:
-                            if not done_newline:
+                            newline_rule_name = newline_map.get(pretty.newline)
-                                generated_grammar.append(("newline", []))
+                            if newline_rule_name is None:
-                                done_newline = True
+                                newline_rule_name = f"n{len(newline_map)}"
-                            tx_children.append("newline")
+                                newline_map[pretty.newline] = newline_rule_name
                                generated_grammar.append((newline_rule_name, []))
                            tx_children.append(newline_rule_name)
                    # If it turned out to have formatting meta then we will
                    # have replaced or augmented the translated children
@ -429,7 +452,13 @@ class Printer:
        gen = self.grammar._generator(rule.name, generated_grammar)
        parse_table = gen.gen_table()
-        return Matcher(parse_table, indent_amounts, self._text_follow)
+        newline_replace = {v: k for k, v in newline_map.items()}
        return Matcher(
            parse_table,
            indent_amounts,
            self._text_follow,
            newline_replace,
        )
    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
        result = self._matchers.get(rule.name)
--- a/tests/test_wadler.py
+++ b/tests/test_wadler.py
@ -29,35 +29,42 @@ class JsonGrammar(Grammar):
    @rule
    def object(self):
-        return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
+        return group(
            self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
        )
    @rule
    def _object_pairs(self):
        return alt(
-            newline() + self.object_pair,
+            self.object_pair,
-            newline() + self.object_pair + self.COMMA + self._object_pairs,
+            self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
        )
    @rule
    def object_pair(self):
-        return group(self.STRING + self.COLON + self.value)
+        return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
    @rule
    def array(self):
-        return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
+        return group(
            self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
        )
    @rule
    def _array_items(self):
        return alt(
-            newline() + self.value,
+            self.value,
-            newline() + self.value + self.COMMA + self._array_items,
+            self.value + self.COMMA + newline(" ") + self._array_items,
        )
-    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
+    BLANKS = Terminal(
        Re.set(" ", "\t", "\r", "\n").plus(),
        is_format_blank=True,
    )
    LCURLY = Terminal("{")
    RCURLY = Terminal("}")
    COMMA = Terminal(",")
-    COLON = Terminal(":", format_follow=" ")
+    COLON = Terminal(":")
    LSQUARE = Terminal("[")
    RSQUARE = Terminal("]")
    TRUE = Terminal("true")
@ -94,8 +101,8 @@ JSON_PARSER = runtime.Parser(JSON_TABLE)
 def flatten_document(doc: wadler.Document, src: str) -> list:
    match doc:
-        case wadler.NewLine():
+        case wadler.NewLine(replace):
-            return ["<newline>"]
+            return [f"<newline {repr(replace)}>"]
        case wadler.Indent():
            return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
        case wadler.Text(start, end):
@ -130,61 +137,70 @@ def test_convert_tree_to_document():
            [
                "<indent 1>",
                [
-                    "<newline>",
+                    "<newline ''>",
-                    ['"a"', ":", " ", "true"],
+                    [
                        '"a"',
                        ":",
                        [
                            "<indent 1>",
                            ["<newline ' '>", "true"],
                        ],
                    ],
                    ",",
-                    "<newline>",
+                    "<newline ' '>",
                    [
                        '"b"',
                        ":",
                        " ",
                        [
-                            "[",
+                            "<indent 1>",
                            [
-                                "<indent 1>",
+                                "<newline ' '>",
                                [
-                                    "<newline>",
+                                    "[",
-                                    "1",
+                                    [
-                                    ",",
+                                        "<indent 1>",
-                                    "<newline>",
+                                        [
-                                    "2",
+                                            "<newline ''>",
-                                    ",",
+                                            "1",
-                                    "<newline>",
+                                            ",",
-                                    "3",
+                                            "<newline ' '>",
                                            "2",
                                            ",",
                                            "<newline ' '>",
                                            "3",
                                        ],
                                    ],
                                    "<newline ''>",
                                    "]",
                                ],
                            ],
                            "<newline>",
                            "]",
                        ],
                    ],
                ],
            ],
-            "<newline>",
+            "<newline ''>",
            "}",
        ]
    ]
 def test_layout_basic():
-    text = '{"a": true, "b":[1,2,3]}'
+    text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
    tree, errors = JSON_PARSER.parse(tokens)
    assert [] == errors
    assert tree is not None
    printer = wadler.Printer(JSON)
-    result = printer.format_tree(tree, 10).apply_to_source(text)
+    result = printer.format_tree(tree, 50).apply_to_source(text)
    assert (
        result
        == """
 {
 "a": true,
- "b": [
+ "b": [1, 2, 3],
-  1,
+ "c": [1, 2, 3, 4, 5, 6, 7]
  2,
  3
 ]
 }
 """.strip()
    )