Really messing around with trivia, it's not good yet

It's really not clear how to track it and how to compose it with groups yet. Really very difficult.
2024-09-14 17:14:07 -07:00 · 2024-09-14 17:14:07 -07:00 · d5ccd5b147
commit d5ccd5b147
parent 71b59302fa
4 changed files with 131 additions and 30 deletions
--- a/grammar.py
+++ b/grammar.py
@ -5,6 +5,7 @@ from parser import (
    Re,
    Rule,
    Terminal,
    TriviaMode,
    alt,
    br,
    group,
@ -23,7 +24,7 @@ class FineGrammar(Grammar):
    # generator = parser.GenerateLR1
    start = "File"
-    trivia = ["BLANKS", "COMMENT"]
+    trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
    pretty_indent = "  "
@ -75,13 +76,11 @@ class FineGrammar(Grammar):
    @rule("ClassDeclaration")
    def class_declaration(self) -> Rule:
        return seq(
            group(
            group(
                self.CLASS,
                sp,
                mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
                sp,
                ),
                self.LCURLY,
            ),
            indent(nl, mark(opt(self.class_body), field="body")),
@ -193,7 +192,7 @@ class FineGrammar(Grammar):
    def block(self) -> Rule:
        return alt(
            group(self.LCURLY, nl, self.RCURLY),
-            seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY),
+            group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
        )
    @rule("BlockBody")
@ -201,7 +200,7 @@ class FineGrammar(Grammar):
        return alt(
            self.expression,
            self._statement_list,
-            seq(self._statement_list, nl, self.expression),
+            seq(self._statement_list, br, self.expression),
        )
    @rule
@ -420,10 +419,12 @@ class FineGrammar(Grammar):
    def field_value(self) -> Rule:
        return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
-    BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
+    BLANKS = Terminal(Re.set(" ", "\t").plus())
    LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
    COMMENT = Terminal(
        Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
        highlight=highlight.comment.line,
        trivia_mode=TriviaMode.LineComment,
    )
    ARROW = Terminal("->", highlight=highlight.keyword.operator)
--- a/harness.py
+++ b/harness.py
@ -543,12 +543,12 @@ class Harness:
            case wadler.Text(start, end):
                if self.source is not None:
-                    append(f"< {self.source[start:end]}")
+                    append(f"< {repr(self.source[start:end])}")
                else:
                    append(f"< ??? {start}:{end}")
            case wadler.Literal(text):
-                append(f"' {text}")
+                append(f"literal {repr(text)}")
            case wadler.Group():
                append("group")
--- a/parser/parser.py
+++ b/parser/parser.py
@ -2724,11 +2724,35 @@ sp = newline(" ")
 def forced_break() -> Rule:
    """Indicate that the line MUST break right here, for whatever reason."""
    return mark(Nothing, format=FormatMeta(forced_break=True))
 br = forced_break()
 class TriviaMode(enum.Enum):
    """Indicate how a particular bit of trivia is to be handled during
    pretty-printing. Attach this to a "trivia_mode" property on a Terminal
    definition.
    - Ignore means that the trivia should be ignored. (This is the default.)
    - NewLine means that the trivia is a line break. This is important for
      other modes, specifically...
    - LineComment means that the trivia is a line comment. If a line comment
      is alone on a line, then a forced break is inserted so that it remains
      alone on its line after formatting, otherwise it is attached to whatever
      is to its left by a single space. A LineComment is *always* followed by
      a forced break.
    """
    Ignore = 0
    NewLine = 1
    LineComment = 2
 ###############################################################################
 # Finally, the base class for grammars
 ###############################################################################
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -17,13 +17,6 @@ class Cons:
    right: "Document"
 def cons(left: "Document", right: "Document") -> "Document":
    if left and right:
        return Cons(left, right)
    else:
        return left or right
@dataclasses.dataclass(frozen=True)
 class NewLine:
    replace: str
@ -31,7 +24,7 @@ class NewLine:
@dataclasses.dataclass(frozen=True)
 class ForceBreak:
-    pass
+    silent: bool
@dataclasses.dataclass(frozen=True)
@ -79,6 +72,21 @@ class Lazy:
 Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy
 def cons(*documents: Document) -> Document:
    result = None
    for doc in documents:
        if result is None:
            result = doc
        elif doc is not None:
            result = Cons(result, doc)
    return result
 def group(document: Document) -> Document:
    return Group(document)
 ############################################################################
 # Layouts
 ############################################################################
@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
    column = 0
-    chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
+    chunks: list[Chunk] = [
        Chunk(
            doc=doc,
            indent=0,
            flat=False,  # NOTE: Assume flat until we know how to break.
        )
    ]
    def fits(chunk: Chunk) -> bool:
        remaining = width - column
@ -216,8 +230,9 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
                    output.append("\n" + (chunk.indent * indent))
                    column = chunk.indent * len(indent)
-            case ForceBreak():
+            case ForceBreak(silent):
                # TODO: Custom newline expansion, custom indent segments.
                if not silent:
                    output.append("\n" + (chunk.indent * indent))
                    column = chunk.indent * len(indent)
@ -276,11 +291,18 @@ class Matcher:
    table: parser.ParseTable
    indent_amounts: dict[str, int]
    newline_replace: dict[str, str]
    trivia_mode: dict[str, parser.TriviaMode]
    def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
        stack: list[tuple[int, Document]] = [(0, None)]
        table = self.table
        # eof_trivia = []
        # if len(items) > 0:
        #     item = items[-1]
        #     if isinstance(item, runtime.TokenValue):
        #         eof_trivia = item.post_trivia
        input = [(child_to_name(i), i) for i in items] + [
            (
                "$",
@ -302,7 +324,9 @@ class Matcher:
            match action:
                case parser.Accept():
-                    return stack[-1][1]
+                    result = stack[-1][1]
                    # result = cons(result, self.apply_trivia(eof_trivia))
                    return result
                case parser.Reduce(name=name, count=size):
                    child: Document = None
@ -314,7 +338,7 @@ class Matcher:
                        del stack[-size:]
                    if name[0] == "g":
-                        child = Group(child)
+                        child = group(child)
                    elif name[0] == "i":
                        amount = self.indent_amounts[name]
@ -329,10 +353,10 @@ class Matcher:
                        child = cons(NewLine(replace), child)
                    elif name[0] == "f":
-                        child = cons(child, ForceBreak())
+                        child = cons(child, ForceBreak(False))
                    elif name[0] == "d":
-                        child = cons(ForceBreak(), child)
+                        child = cons(ForceBreak(False), child)
                    else:
                        pass  # Reducing a transparent rule probably.
@ -347,8 +371,8 @@ class Matcher:
                    if isinstance(value, runtime.Tree):
                        child = Lazy.from_tree(value, printer)
                    else:
                        # TODO: Consider trivia and preserve comments!
                        child = Text(value.start, value.end)
                        child = cons(child, self.apply_trivia(value.post_trivia))
                    stack.append((action.state, child))
                    input_index += 1
@ -356,6 +380,47 @@ class Matcher:
                case parser.Error():
                    raise Exception("How did I get a parse error here??")
    def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document:
        had_newline = False
        trivia_doc = None
        for token in trivia:
            mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
            match mode:
                case parser.TriviaMode.Ignore:
                    pass
                case parser.TriviaMode.NewLine:
                    # We ignore line breaks because obviously
                    # we expect the pretty-printer to put the
                    # line breaks in where they belong *but*
                    # we track if they happened to influence
                    # the layout.
                    had_newline = True
                case parser.TriviaMode.LineComment:
                    if had_newline:
                        # This line comment is all alone on
                        # its line, so we need to maintain
                        # that.
                        line_break = NewLine("")
                    else:
                        # This line comment is attached to
                        # something to the left, reduce it to
                        # a space.
                        line_break = Literal(" ")
                    trivia_doc = cons(
                        trivia_doc,
                        line_break,
                        Text(token.start, token.end),
                        ForceBreak(True),  # This is probably the wrong place for this!
                    )
                case _:
                    typing.assert_never(mode)
        return trivia_doc
 class Printer:
    # TODO: Pre-generate the matcher tables for a grammar, to make it
@ -364,6 +429,7 @@ class Printer:
    _matchers: dict[str, Matcher]
    _nonterminals: dict[str, parser.NonTerminal]
    _indent: str
    _trivia_mode: dict[str, parser.TriviaMode]
    def __init__(self, grammar: parser.Grammar, indent: str | None = None):
        self.grammar = grammar
@ -371,9 +437,18 @@ class Printer:
        self._matchers = {}
        if indent is None:
-            indent = getattr(self.grammar, "pretty_indent", " ")
+            indent = getattr(self.grammar, "pretty_indent", None)
        if indent is None:
            indent = " "
        self._indent = indent
        trivia_mode = {}
        for t in grammar.terminals():
            mode = t.meta.get("trivia_mode")
            if t.name is not None and isinstance(mode, parser.TriviaMode):
                trivia_mode[t.name] = mode
        self._trivia_mode = trivia_mode
    def indent(self) -> str:
        return self._indent
@ -535,6 +610,7 @@ class Printer:
            parse_table,
            indent_amounts,
            final_newlines,
            self._trivia_mode,
        )
    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: