Support newline replacements

This allows us to do maybe more complicated spacing. Still unclear about identifier/punctuation spacing.
2024-09-12 11:09:14 -07:00 · 2024-09-12 11:09:14 -07:00 · 938f0e5c69
commit 938f0e5c69
parent b3b2102864
2 changed files with 101 additions and 56 deletions
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -21,7 +21,7 @@ def cons(left: "Document", right: "Document") -> "Document":

@dataclasses.dataclass(frozen=True)
 class NewLine:
-    pass
+    replace: str


@dataclasses.dataclass(frozen=True)
@ -119,13 +119,9 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
                case Literal(text):
                    remaining -= len(text)

-                case NewLine():
+                case NewLine(replace):
                    if chunk.flat:
-                        # These are newlines that have been rendered flat,
-                        # they are spaces I guess? TODO: Consider alternate
-                        # forms, something that "goes here instead of
-                        # newline", like maybe the empty string or... what?
-                        remaining -= 1
+                        remaining -= len(replace)
                    else:
                        # These are newlines that are real, so it must have
                        # all fit.
@ -175,12 +171,10 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
                output.append(text)
                column += len(text)

-            case NewLine():
+            case NewLine(replace):
                if chunk.flat:
-                    # TODO: Custom newline flat mode. See also the
-                    # corresponding comment in the "fits" function.
-                    output.append(" ")
-                    column += 1
+                    output.append(replace)
+                    column += len(replace)
                else:
                    # TODO: Custom newline expansion, custom indent segments.
                    output.append("\n" + (chunk.indent * " "))
@ -220,7 +214,7 @@ def resolve_document(doc: Document) -> Document:
                return doc

        case Lazy(_):
-            return doc.resolve()
+            return resolve_document(doc.resolve())

        case _:
            return doc
@ -239,16 +233,19 @@ class Matcher:
    table: parser.ParseTable
    indent_amounts: dict[str, int]
    text_follow: dict[str, str]
+    newline_replace: dict[str, str]

    def __init__(
        self,
        table: parser.ParseTable,
        indent_amounts: dict[str, int],
        text_follow: dict[str, str],
+        newline_replace: dict[str, str],
    ):
        self.table = table
        self.indent_amounts = indent_amounts
        self.text_follow = text_follow
+        self.newline_replace = newline_replace

    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
        stack: list[tuple[int, Document]] = [(0, None)]
@ -294,15 +291,17 @@ class Matcher:
                        child = Indent(amount, child)

                    elif name[0] == "n":
-                        child = cons(child, NewLine())
+                        replace = self.newline_replace[name]
+                        print(f"!!!! {name} -> {repr(replace)}")
+                        child = cons(child, NewLine(replace))

                    elif name[0] == "p":
-                        child = cons(NewLine(), child)
+                        child = cons(NewLine(""), child)

                    else:
                        pass  # Reducing a transparent rule probably.

-                    goto = self.table.gotos[stack[-1][0]].get(name)
+                    goto = table.gotos[stack[-1][0]].get(name)
                    assert goto is not None
                    stack.append((goto, child))

@ -315,7 +314,27 @@ class Matcher:
                        if value.name:
                            follow = self.text_follow.get(value.name)
                    else:
+                        # Here is where we consider ephemera. We can say: if
+                        # the trailing ephemera includes a blank, then we
+                        # insert a blank here. We do not want to double-count
+                        # blanks, maybe we can have some kind of a notion of
+                        # what is a blank.
+                        #
+                        # A wierd digression: one thing that's weird is that
+                        # blank spaces are always kinda culturally assumed?
+                        # But the computer always has to be taught. In hand-
+                        # printers, the spaces are added by a person and the
+                        # person doesn't think twice. We are in the unique
+                        # position of "generalizing" the blank space for
+                        # formatting purposes.
                        child = Text(value.start, value.end)
+
+                        for trivia in value.pre_trivia:
+                            pass
+
+                        for trivia in value.post_trivia:
+                            pass
+
                        follow = self.text_follow.get(value.kind)

                    if follow is not None:
@ -357,6 +376,7 @@ class Printer:
        group_count = 0
        indent_amounts: dict[str, int] = {}
        done_newline = False
+        newline_map: dict[str, str] = {}

        def compile_nonterminal(name: str, rule: parser.NonTerminal):
            if name not in visited:
@ -411,10 +431,13 @@ class Printer:
                            tx_children = [rule_name]

                        if pretty.newline is not None:
-                            if not done_newline:
-                                generated_grammar.append(("newline", []))
-                                done_newline = True
-                            tx_children.append("newline")
+                            newline_rule_name = newline_map.get(pretty.newline)
+                            if newline_rule_name is None:
+                                newline_rule_name = f"n{len(newline_map)}"
+                                newline_map[pretty.newline] = newline_rule_name
+                                generated_grammar.append((newline_rule_name, []))
+
+                            tx_children.append(newline_rule_name)

                    # If it turned out to have formatting meta then we will
                    # have replaced or augmented the translated children
@ -429,7 +452,13 @@ class Printer:
        gen = self.grammar._generator(rule.name, generated_grammar)
        parse_table = gen.gen_table()

-        return Matcher(parse_table, indent_amounts, self._text_follow)
+        newline_replace = {v: k for k, v in newline_map.items()}
+        return Matcher(
+            parse_table,
+            indent_amounts,
+            self._text_follow,
+            newline_replace,
+        )

    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
        result = self._matchers.get(rule.name)
--- a/tests/test_wadler.py
+++ b/tests/test_wadler.py
@ -29,35 +29,42 @@ class JsonGrammar(Grammar):

    @rule
    def object(self):
-        return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
+        return group(
+            self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
+        )

    @rule
    def _object_pairs(self):
        return alt(
-            newline() + self.object_pair,
-            newline() + self.object_pair + self.COMMA + self._object_pairs,
+            self.object_pair,
+            self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
        )

    @rule
    def object_pair(self):
-        return group(self.STRING + self.COLON + self.value)
+        return group(self.STRING + self.COLON + indent(newline(" ") + self.value))

    @rule
    def array(self):
-        return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
+        return group(
+            self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
+        )

    @rule
    def _array_items(self):
        return alt(
-            newline() + self.value,
-            newline() + self.value + self.COMMA + self._array_items,
+            self.value,
+            self.value + self.COMMA + newline(" ") + self._array_items,
        )

-    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
+    BLANKS = Terminal(
+        Re.set(" ", "\t", "\r", "\n").plus(),
+        is_format_blank=True,
+    )
    LCURLY = Terminal("{")
    RCURLY = Terminal("}")
    COMMA = Terminal(",")
-    COLON = Terminal(":", format_follow=" ")
+    COLON = Terminal(":")
    LSQUARE = Terminal("[")
    RSQUARE = Terminal("]")
    TRUE = Terminal("true")
@ -94,8 +101,8 @@ JSON_PARSER = runtime.Parser(JSON_TABLE)

 def flatten_document(doc: wadler.Document, src: str) -> list:
    match doc:
-        case wadler.NewLine():
-            return ["<newline>"]
+        case wadler.NewLine(replace):
+            return [f"<newline {repr(replace)}>"]
        case wadler.Indent():
            return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
        case wadler.Text(start, end):
@ -130,61 +137,70 @@ def test_convert_tree_to_document():
            [
                "<indent 1>",
                [
-                    "<newline>",
-                    ['"a"', ":", " ", "true"],
+                    "<newline ''>",
+                    [
+                        '"a"',
+                        ":",
+                        [
+                            "<indent 1>",
+                            ["<newline ' '>", "true"],
+                        ],
+                    ],
                    ",",
-                    "<newline>",
+                    "<newline ' '>",
                    [
                        '"b"',
                        ":",
-                        " ",
                        [
-                            "[",
+                            "<indent 1>",
                            [
-                                "<indent 1>",
+                                "<newline ' '>",
                                [
-                                    "<newline>",
-                                    "1",
-                                    ",",
-                                    "<newline>",
-                                    "2",
-                                    ",",
-                                    "<newline>",
-                                    "3",
+                                    "[",
+                                    [
+                                        "<indent 1>",
+                                        [
+                                            "<newline ''>",
+                                            "1",
+                                            ",",
+                                            "<newline ' '>",
+                                            "2",
+                                            ",",
+                                            "<newline ' '>",
+                                            "3",
+                                        ],
+                                    ],
+                                    "<newline ''>",
+                                    "]",
                                ],
                            ],
-                            "<newline>",
-                            "]",
                        ],
                    ],
                ],
            ],
-            "<newline>",
+            "<newline ''>",
            "}",
        ]
    ]


 def test_layout_basic():
-    text = '{"a": true, "b":[1,2,3]}'
+    text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
    tokens = runtime.GenericTokenStream(text, JSON_LEXER)
    tree, errors = JSON_PARSER.parse(tokens)
    assert [] == errors
    assert tree is not None

    printer = wadler.Printer(JSON)
-    result = printer.format_tree(tree, 10).apply_to_source(text)
+    result = printer.format_tree(tree, 50).apply_to_source(text)

    assert (
        result
        == """
 {
 "a": true,
- "b": [
-  1,
-  2,
-  3
- ]
+ "b": [1, 2, 3],
+ "c": [1, 2, 3, 4, 5, 6, 7]
 }
 """.strip()
    )