Really messing around with trivia, it's not good yet

It's really not clear how to track it and how to compose it with groups yet. Really very difficult.
2024-09-14 17:14:07 -07:00 · 2024-09-14 17:14:07 -07:00 · d5ccd5b147
commit d5ccd5b147
parent 71b59302fa
4 changed files with 131 additions and 30 deletions
--- a/grammar.py
+++ b/grammar.py
@ -5,6 +5,7 @@ from parser import (
    Re,
    Rule,
    Terminal,
+    TriviaMode,
    alt,
    br,
    group,
@ -23,7 +24,7 @@ class FineGrammar(Grammar):
    # generator = parser.GenerateLR1
    start = "File"

-    trivia = ["BLANKS", "COMMENT"]
+    trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]

    pretty_indent = "  "

@ -76,12 +77,10 @@ class FineGrammar(Grammar):
    def class_declaration(self) -> Rule:
        return seq(
            group(
-                group(
-                    self.CLASS,
-                    sp,
-                    mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
-                    sp,
-                ),
+                self.CLASS,
+                sp,
+                mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
+                sp,
                self.LCURLY,
            ),
            indent(nl, mark(opt(self.class_body), field="body")),
@ -193,7 +192,7 @@ class FineGrammar(Grammar):
    def block(self) -> Rule:
        return alt(
            group(self.LCURLY, nl, self.RCURLY),
-            seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY),
+            group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
        )

    @rule("BlockBody")
@ -201,7 +200,7 @@ class FineGrammar(Grammar):
        return alt(
            self.expression,
            self._statement_list,
-            seq(self._statement_list, nl, self.expression),
+            seq(self._statement_list, br, self.expression),
        )

    @rule
@ -420,10 +419,12 @@ class FineGrammar(Grammar):
    def field_value(self) -> Rule:
        return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))

-    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
+    BLANKS = Terminal(Re.set(" ", "\t").plus())
+    LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
    COMMENT = Terminal(
        Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
        highlight=highlight.comment.line,
+        trivia_mode=TriviaMode.LineComment,
    )

    ARROW = Terminal("->", highlight=highlight.keyword.operator)
--- a/harness.py
+++ b/harness.py
@ -543,12 +543,12 @@ class Harness:

            case wadler.Text(start, end):
                if self.source is not None:
-                    append(f"< {self.source[start:end]}")
+                    append(f"< {repr(self.source[start:end])}")
                else:
                    append(f"< ??? {start}:{end}")

            case wadler.Literal(text):
-                append(f"' {text}")
+                append(f"literal {repr(text)}")

            case wadler.Group():
                append("group")
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2724,11 +2724,35 @@ sp = newline(" ")


 def forced_break() -> Rule:
+    """Indicate that the line MUST break right here, for whatever reason."""
    return mark(Nothing, format=FormatMeta(forced_break=True))


 br = forced_break()

+
+class TriviaMode(enum.Enum):
+    """Indicate how a particular bit of trivia is to be handled during
+    pretty-printing. Attach this to a "trivia_mode" property on a Terminal
+    definition.
+
+    - Ignore means that the trivia should be ignored. (This is the default.)
+
+    - NewLine means that the trivia is a line break. This is important for
+      other modes, specifically...
+
+    - LineComment means that the trivia is a line comment. If a line comment
+      is alone on a line, then a forced break is inserted so that it remains
+      alone on its line after formatting, otherwise it is attached to whatever
+      is to its left by a single space. A LineComment is *always* followed by
+      a forced break.
+    """
+
+    Ignore = 0
+    NewLine = 1
+    LineComment = 2
+
+
 ###############################################################################
 # Finally, the base class for grammars
 ###############################################################################
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -17,13 +17,6 @@ class Cons:
    right: "Document"


-def cons(left: "Document", right: "Document") -> "Document":
-    if left and right:
-        return Cons(left, right)
-    else:
-        return left or right
-
-
@dataclasses.dataclass(frozen=True)
 class NewLine:
    replace: str
@ -31,7 +24,7 @@ class NewLine:

@dataclasses.dataclass(frozen=True)
 class ForceBreak:
-    pass
+    silent: bool


@dataclasses.dataclass(frozen=True)
@ -79,6 +72,21 @@ class Lazy:
 Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy


+def cons(*documents: Document) -> Document:
+    result = None
+    for doc in documents:
+        if result is None:
+            result = doc
+        elif doc is not None:
+            result = Cons(result, doc)
+
+    return result
+
+
+def group(document: Document) -> Document:
+    return Group(document)
+
+
 ############################################################################
 # Layouts
 ############################################################################
@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)

    column = 0
-    chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
+    chunks: list[Chunk] = [
+        Chunk(
+            doc=doc,
+            indent=0,
+            flat=False,  # NOTE: Assume flat until we know how to break.
+        )
+    ]

    def fits(chunk: Chunk) -> bool:
        remaining = width - column
@ -216,10 +230,11 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
                    output.append("\n" + (chunk.indent * indent))
                    column = chunk.indent * len(indent)

-            case ForceBreak():
+            case ForceBreak(silent):
                # TODO: Custom newline expansion, custom indent segments.
-                output.append("\n" + (chunk.indent * indent))
-                column = chunk.indent * len(indent)
+                if not silent:
+                    output.append("\n" + (chunk.indent * indent))
+                    column = chunk.indent * len(indent)

            case Cons(left, right):
                chunks.append(chunk.with_document(right))
@ -276,11 +291,18 @@ class Matcher:
    table: parser.ParseTable
    indent_amounts: dict[str, int]
    newline_replace: dict[str, str]
+    trivia_mode: dict[str, parser.TriviaMode]

    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
        stack: list[tuple[int, Document]] = [(0, None)]
        table = self.table

+        # eof_trivia = []
+        # if len(items) > 0:
+        #     item = items[-1]
+        #     if isinstance(item, runtime.TokenValue):
+        #         eof_trivia = item.post_trivia
+
        input = [(child_to_name(i), i) for i in items] + [
            (
                "$",
@ -302,7 +324,9 @@ class Matcher:

            match action:
                case parser.Accept():
-                    return stack[-1][1]
+                    result = stack[-1][1]
+                    # result = cons(result, self.apply_trivia(eof_trivia))
+                    return result

                case parser.Reduce(name=name, count=size):
                    child: Document = None
@ -314,7 +338,7 @@ class Matcher:
                        del stack[-size:]

                    if name[0] == "g":
-                        child = Group(child)
+                        child = group(child)

                    elif name[0] == "i":
                        amount = self.indent_amounts[name]
@ -329,10 +353,10 @@ class Matcher:
                        child = cons(NewLine(replace), child)

                    elif name[0] == "f":
-                        child = cons(child, ForceBreak())
+                        child = cons(child, ForceBreak(False))

                    elif name[0] == "d":
-                        child = cons(ForceBreak(), child)
+                        child = cons(ForceBreak(False), child)

                    else:
                        pass  # Reducing a transparent rule probably.
@ -347,8 +371,8 @@ class Matcher:
                    if isinstance(value, runtime.Tree):
                        child = Lazy.from_tree(value, printer)
                    else:
-                        # TODO: Consider trivia and preserve comments!
                        child = Text(value.start, value.end)
+                        child = cons(child, self.apply_trivia(value.post_trivia))

                    stack.append((action.state, child))
                    input_index += 1
@ -356,6 +380,47 @@ class Matcher:
                case parser.Error():
                    raise Exception("How did I get a parse error here??")

+    def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document:
+        had_newline = False
+        trivia_doc = None
+        for token in trivia:
+            mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
+            match mode:
+                case parser.TriviaMode.Ignore:
+                    pass
+
+                case parser.TriviaMode.NewLine:
+                    # We ignore line breaks because obviously
+                    # we expect the pretty-printer to put the
+                    # line breaks in where they belong *but*
+                    # we track if they happened to influence
+                    # the layout.
+                    had_newline = True
+
+                case parser.TriviaMode.LineComment:
+                    if had_newline:
+                        # This line comment is all alone on
+                        # its line, so we need to maintain
+                        # that.
+                        line_break = NewLine("")
+                    else:
+                        # This line comment is attached to
+                        # something to the left, reduce it to
+                        # a space.
+                        line_break = Literal(" ")
+
+                    trivia_doc = cons(
+                        trivia_doc,
+                        line_break,
+                        Text(token.start, token.end),
+                        ForceBreak(True),  # This is probably the wrong place for this!
+                    )
+
+                case _:
+                    typing.assert_never(mode)
+
+        return trivia_doc
+

 class Printer:
    # TODO: Pre-generate the matcher tables for a grammar, to make it
@ -364,6 +429,7 @@ class Printer:
    _matchers: dict[str, Matcher]
    _nonterminals: dict[str, parser.NonTerminal]
    _indent: str
+    _trivia_mode: dict[str, parser.TriviaMode]

    def __init__(self, grammar: parser.Grammar, indent: str | None = None):
        self.grammar = grammar
@ -371,9 +437,18 @@ class Printer:
        self._matchers = {}

        if indent is None:
-            indent = getattr(self.grammar, "pretty_indent", " ")
+            indent = getattr(self.grammar, "pretty_indent", None)
+        if indent is None:
+            indent = " "
        self._indent = indent

+        trivia_mode = {}
+        for t in grammar.terminals():
+            mode = t.meta.get("trivia_mode")
+            if t.name is not None and isinstance(mode, parser.TriviaMode):
+                trivia_mode[t.name] = mode
+        self._trivia_mode = trivia_mode
+
    def indent(self) -> str:
        return self._indent

@ -535,6 +610,7 @@ class Printer:
            parse_table,
            indent_amounts,
            final_newlines,
+            self._trivia_mode,
        )

    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: