From d5ccd5b147c4e6814d55d448dc63639b2be6b33a Mon Sep 17 00:00:00 2001
From: John Doty <john@d0ty.me>
Date: Sat, 14 Sep 2024 17:14:07 -0700
Subject: [PATCH] Really messing around with trivia, it's not good yet

It's really not clear how to track it and how to compose it with
groups yet. Really very difficult.
---
 grammar.py       |  21 ++++-----
 harness.py       |   4 +-
 parser/parser.py |  24 ++++++++++
 parser/wadler.py | 112 +++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 131 insertions(+), 30 deletions(-)

diff --git a/grammar.py b/grammar.py
index d7118ac..8f5585d 100644
--- a/grammar.py
+++ b/grammar.py
@@ -5,6 +5,7 @@ from parser import (
     Re,
     Rule,
     Terminal,
+    TriviaMode,
     alt,
     br,
     group,
@@ -23,7 +24,7 @@ class FineGrammar(Grammar):
     # generator = parser.GenerateLR1
     start = "File"
 
-    trivia = ["BLANKS", "COMMENT"]
+    trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
 
     pretty_indent = "  "
 
@@ -76,12 +77,10 @@ class FineGrammar(Grammar):
     def class_declaration(self) -> Rule:
         return seq(
             group(
-                group(
-                    self.CLASS,
-                    sp,
-                    mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
-                    sp,
-                ),
+                self.CLASS,
+                sp,
+                mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
+                sp,
                 self.LCURLY,
             ),
             indent(nl, mark(opt(self.class_body), field="body")),
@@ -193,7 +192,7 @@ class FineGrammar(Grammar):
     def block(self) -> Rule:
         return alt(
             group(self.LCURLY, nl, self.RCURLY),
-            seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY),
+            group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
         )
 
     @rule("BlockBody")
@@ -201,7 +200,7 @@ class FineGrammar(Grammar):
         return alt(
             self.expression,
             self._statement_list,
-            seq(self._statement_list, nl, self.expression),
+            seq(self._statement_list, br, self.expression),
         )
 
     @rule
@@ -420,10 +419,12 @@ class FineGrammar(Grammar):
     def field_value(self) -> Rule:
         return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
 
-    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
+    BLANKS = Terminal(Re.set(" ", "\t").plus())
+    LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
     COMMENT = Terminal(
         Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
         highlight=highlight.comment.line,
+        trivia_mode=TriviaMode.LineComment,
     )
 
     ARROW = Terminal("->", highlight=highlight.keyword.operator)
diff --git a/harness.py b/harness.py
index a4d6862..f7b5ce8 100644
--- a/harness.py
+++ b/harness.py
@@ -543,12 +543,12 @@ class Harness:
 
             case wadler.Text(start, end):
                 if self.source is not None:
-                    append(f"< {self.source[start:end]}")
+                    append(f"< {repr(self.source[start:end])}")
                 else:
                     append(f"< ??? {start}:{end}")
 
             case wadler.Literal(text):
-                append(f"' {text}")
+                append(f"literal {repr(text)}")
 
             case wadler.Group():
                 append("group")
diff --git a/parser/parser.py b/parser/parser.py
index 6eafd88..bff4034 100644
--- a/parser/parser.py
+++ b/parser/parser.py
@@ -2724,11 +2724,35 @@ sp = newline(" ")
 
 
 def forced_break() -> Rule:
+    """Indicate that the line MUST break right here, for whatever reason."""
     return mark(Nothing, format=FormatMeta(forced_break=True))
 
 
 br = forced_break()
 
+
+class TriviaMode(enum.Enum):
+    """Indicate how a particular bit of trivia is to be handled during
+    pretty-printing. Attach this to a "trivia_mode" property on a Terminal
+    definition.
+
+    - Ignore means that the trivia should be ignored. (This is the default.)
+
+    - NewLine means that the trivia is a line break. This is important for
+      other modes, specifically...
+
+    - LineComment means that the trivia is a line comment. If a line comment
+      is alone on a line, then a forced break is inserted so that it remains
+      alone on its line after formatting, otherwise it is attached to whatever
+      is to its left by a single space. A LineComment is *always* followed by
+      a forced break.
+    """
+
+    Ignore = 0
+    NewLine = 1
+    LineComment = 2
+
+
 ###############################################################################
 # Finally, the base class for grammars
 ###############################################################################
diff --git a/parser/wadler.py b/parser/wadler.py
index 3d88851..9278deb 100644
--- a/parser/wadler.py
+++ b/parser/wadler.py
@@ -17,13 +17,6 @@ class Cons:
     right: "Document"
 
 
-def cons(left: "Document", right: "Document") -> "Document":
-    if left and right:
-        return Cons(left, right)
-    else:
-        return left or right
-
-
 @dataclasses.dataclass(frozen=True)
 class NewLine:
     replace: str
@@ -31,7 +24,7 @@ class NewLine:
 
 @dataclasses.dataclass(frozen=True)
 class ForceBreak:
-    pass
+    silent: bool
 
 
 @dataclasses.dataclass(frozen=True)
@@ -79,6 +72,21 @@ class Lazy:
 Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy
 
 
+def cons(*documents: Document) -> Document:
+    result = None
+    for doc in documents:
+        if result is None:
+            result = doc
+        elif doc is not None:
+            result = Cons(result, doc)
+
+    return result
+
+
+def group(document: Document) -> Document:
+    return Group(document)
+
+
 ############################################################################
 # Layouts
 ############################################################################
@@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
             return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
 
     column = 0
-    chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
+    chunks: list[Chunk] = [
+        Chunk(
+            doc=doc,
+            indent=0,
+            flat=False,  # NOTE: Assume flat until we know how to break.
+        )
+    ]
 
     def fits(chunk: Chunk) -> bool:
         remaining = width - column
@@ -216,10 +230,11 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
                     output.append("\n" + (chunk.indent * indent))
                     column = chunk.indent * len(indent)
 
-            case ForceBreak():
+            case ForceBreak(silent):
                 # TODO: Custom newline expansion, custom indent segments.
-                output.append("\n" + (chunk.indent * indent))
-                column = chunk.indent * len(indent)
+                if not silent:
+                    output.append("\n" + (chunk.indent * indent))
+                    column = chunk.indent * len(indent)
 
             case Cons(left, right):
                 chunks.append(chunk.with_document(right))
@@ -276,11 +291,18 @@ class Matcher:
     table: parser.ParseTable
     indent_amounts: dict[str, int]
     newline_replace: dict[str, str]
+    trivia_mode: dict[str, parser.TriviaMode]
 
     def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
         stack: list[tuple[int, Document]] = [(0, None)]
         table = self.table
 
+        # eof_trivia = []
+        # if len(items) > 0:
+        #     item = items[-1]
+        #     if isinstance(item, runtime.TokenValue):
+        #         eof_trivia = item.post_trivia
+
         input = [(child_to_name(i), i) for i in items] + [
             (
                 "$",
@@ -302,7 +324,9 @@ class Matcher:
 
             match action:
                 case parser.Accept():
-                    return stack[-1][1]
+                    result = stack[-1][1]
+                    # result = cons(result, self.apply_trivia(eof_trivia))
+                    return result
 
                 case parser.Reduce(name=name, count=size):
                     child: Document = None
@@ -314,7 +338,7 @@ class Matcher:
                         del stack[-size:]
 
                     if name[0] == "g":
-                        child = Group(child)
+                        child = group(child)
 
                     elif name[0] == "i":
                         amount = self.indent_amounts[name]
@@ -329,10 +353,10 @@ class Matcher:
                         child = cons(NewLine(replace), child)
 
                     elif name[0] == "f":
-                        child = cons(child, ForceBreak())
+                        child = cons(child, ForceBreak(False))
 
                     elif name[0] == "d":
-                        child = cons(ForceBreak(), child)
+                        child = cons(ForceBreak(False), child)
 
                     else:
                         pass  # Reducing a transparent rule probably.
@@ -347,8 +371,8 @@ class Matcher:
                     if isinstance(value, runtime.Tree):
                         child = Lazy.from_tree(value, printer)
                     else:
-                        # TODO: Consider trivia and preserve comments!
                         child = Text(value.start, value.end)
+                        child = cons(child, self.apply_trivia(value.post_trivia))
 
                     stack.append((action.state, child))
                     input_index += 1
@@ -356,6 +380,47 @@ class Matcher:
                 case parser.Error():
                     raise Exception("How did I get a parse error here??")
 
+    def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document:
+        had_newline = False
+        trivia_doc = None
+        for token in trivia:
+            mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
+            match mode:
+                case parser.TriviaMode.Ignore:
+                    pass
+
+                case parser.TriviaMode.NewLine:
+                    # We ignore line breaks because obviously
+                    # we expect the pretty-printer to put the
+                    # line breaks in where they belong *but*
+                    # we track if they happened to influence
+                    # the layout.
+                    had_newline = True
+
+                case parser.TriviaMode.LineComment:
+                    if had_newline:
+                        # This line comment is all alone on
+                        # its line, so we need to maintain
+                        # that.
+                        line_break = NewLine("")
+                    else:
+                        # This line comment is attached to
+                        # something to the left, reduce it to
+                        # a space.
+                        line_break = Literal(" ")
+
+                    trivia_doc = cons(
+                        trivia_doc,
+                        line_break,
+                        Text(token.start, token.end),
+                        ForceBreak(True),  # This is probably the wrong place for this!
+                    )
+
+                case _:
+                    typing.assert_never(mode)
+
+        return trivia_doc
+
 
 class Printer:
     # TODO: Pre-generate the matcher tables for a grammar, to make it
@@ -364,6 +429,7 @@ class Printer:
     _matchers: dict[str, Matcher]
     _nonterminals: dict[str, parser.NonTerminal]
     _indent: str
+    _trivia_mode: dict[str, parser.TriviaMode]
 
     def __init__(self, grammar: parser.Grammar, indent: str | None = None):
         self.grammar = grammar
@@ -371,9 +437,18 @@ class Printer:
         self._matchers = {}
 
         if indent is None:
-            indent = getattr(self.grammar, "pretty_indent", " ")
+            indent = getattr(self.grammar, "pretty_indent", None)
+        if indent is None:
+            indent = " "
         self._indent = indent
 
+        trivia_mode = {}
+        for t in grammar.terminals():
+            mode = t.meta.get("trivia_mode")
+            if t.name is not None and isinstance(mode, parser.TriviaMode):
+                trivia_mode[t.name] = mode
+        self._trivia_mode = trivia_mode
+
     def indent(self) -> str:
         return self._indent
 
@@ -535,6 +610,7 @@ class Printer:
             parse_table,
             indent_amounts,
             final_newlines,
+            self._trivia_mode,
         )
 
     def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: