From d5ccd5b147c4e6814d55d448dc63639b2be6b33a Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 14 Sep 2024 17:14:07 -0700 Subject: [PATCH] Really messing around with trivia, it's not good yet It's really not clear how to track it and how to compose it with groups yet. Really very difficult. --- grammar.py | 21 ++++----- harness.py | 4 +- parser/parser.py | 24 ++++++++++ parser/wadler.py | 112 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 131 insertions(+), 30 deletions(-) diff --git a/grammar.py b/grammar.py index d7118ac..8f5585d 100644 --- a/grammar.py +++ b/grammar.py @@ -5,6 +5,7 @@ from parser import ( Re, Rule, Terminal, + TriviaMode, alt, br, group, @@ -23,7 +24,7 @@ class FineGrammar(Grammar): # generator = parser.GenerateLR1 start = "File" - trivia = ["BLANKS", "COMMENT"] + trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"] pretty_indent = " " @@ -76,12 +77,10 @@ class FineGrammar(Grammar): def class_declaration(self) -> Rule: return seq( group( - group( - self.CLASS, - sp, - mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), - sp, - ), + self.CLASS, + sp, + mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), + sp, self.LCURLY, ), indent(nl, mark(opt(self.class_body), field="body")), @@ -193,7 +192,7 @@ class FineGrammar(Grammar): def block(self) -> Rule: return alt( group(self.LCURLY, nl, self.RCURLY), - seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY), + group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY), ) @rule("BlockBody") @@ -201,7 +200,7 @@ class FineGrammar(Grammar): return alt( self.expression, self._statement_list, - seq(self._statement_list, nl, self.expression), + seq(self._statement_list, br, self.expression), ) @rule @@ -420,10 +419,12 @@ class FineGrammar(Grammar): def field_value(self) -> Rule: return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression)) - BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) + BLANKS = Terminal(Re.set(" ", "\t").plus()) + LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine) COMMENT = Terminal( Re.seq(Re.literal("//"), Re.set("\n").invert().star()), highlight=highlight.comment.line, + trivia_mode=TriviaMode.LineComment, ) ARROW = Terminal("->", highlight=highlight.keyword.operator) diff --git a/harness.py b/harness.py index a4d6862..f7b5ce8 100644 --- a/harness.py +++ b/harness.py @@ -543,12 +543,12 @@ class Harness: case wadler.Text(start, end): if self.source is not None: - append(f"< {self.source[start:end]}") + append(f"< {repr(self.source[start:end])}") else: append(f"< ??? {start}:{end}") case wadler.Literal(text): - append(f"' {text}") + append(f"literal {repr(text)}") case wadler.Group(): append("group") diff --git a/parser/parser.py b/parser/parser.py index 6eafd88..bff4034 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -2724,11 +2724,35 @@ sp = newline(" ") def forced_break() -> Rule: + """Indicate that the line MUST break right here, for whatever reason.""" return mark(Nothing, format=FormatMeta(forced_break=True)) br = forced_break() + +class TriviaMode(enum.Enum): + """Indicate how a particular bit of trivia is to be handled during + pretty-printing. Attach this to a "trivia_mode" property on a Terminal + definition. + + - Ignore means that the trivia should be ignored. (This is the default.) + + - NewLine means that the trivia is a line break. This is important for + other modes, specifically... + + - LineComment means that the trivia is a line comment. If a line comment + is alone on a line, then a forced break is inserted so that it remains + alone on its line after formatting, otherwise it is attached to whatever + is to its left by a single space. A LineComment is *always* followed by + a forced break. + """ + + Ignore = 0 + NewLine = 1 + LineComment = 2 + + ############################################################################### # Finally, the base class for grammars ############################################################################### diff --git a/parser/wadler.py b/parser/wadler.py index 3d88851..9278deb 100644 --- a/parser/wadler.py +++ b/parser/wadler.py @@ -17,13 +17,6 @@ class Cons: right: "Document" -def cons(left: "Document", right: "Document") -> "Document": - if left and right: - return Cons(left, right) - else: - return left or right - - @dataclasses.dataclass(frozen=True) class NewLine: replace: str @@ -31,7 +24,7 @@ class NewLine: @dataclasses.dataclass(frozen=True) class ForceBreak: - pass + silent: bool @dataclasses.dataclass(frozen=True) @@ -79,6 +72,21 @@ class Lazy: Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy +def cons(*documents: Document) -> Document: + result = None + for doc in documents: + if result is None: + result = doc + elif doc is not None: + result = Cons(result, doc) + + return result + + +def group(document: Document) -> Document: + return Group(document) + + ############################################################################ # Layouts ############################################################################ @@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout: return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat) column = 0 - chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)] + chunks: list[Chunk] = [ + Chunk( + doc=doc, + indent=0, + flat=False, # NOTE: Assume flat until we know how to break. + ) + ] def fits(chunk: Chunk) -> bool: remaining = width - column @@ -216,10 +230,11 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout: output.append("\n" + (chunk.indent * indent)) column = chunk.indent * len(indent) - case ForceBreak(): + case ForceBreak(silent): # TODO: Custom newline expansion, custom indent segments. - output.append("\n" + (chunk.indent * indent)) - column = chunk.indent * len(indent) + if not silent: + output.append("\n" + (chunk.indent * indent)) + column = chunk.indent * len(indent) case Cons(left, right): chunks.append(chunk.with_document(right)) @@ -276,11 +291,18 @@ class Matcher: table: parser.ParseTable indent_amounts: dict[str, int] newline_replace: dict[str, str] + trivia_mode: dict[str, parser.TriviaMode] def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document: stack: list[tuple[int, Document]] = [(0, None)] table = self.table + # eof_trivia = [] + # if len(items) > 0: + # item = items[-1] + # if isinstance(item, runtime.TokenValue): + # eof_trivia = item.post_trivia + input = [(child_to_name(i), i) for i in items] + [ ( "$", @@ -302,7 +324,9 @@ class Matcher: match action: case parser.Accept(): - return stack[-1][1] + result = stack[-1][1] + # result = cons(result, self.apply_trivia(eof_trivia)) + return result case parser.Reduce(name=name, count=size): child: Document = None @@ -314,7 +338,7 @@ class Matcher: del stack[-size:] if name[0] == "g": - child = Group(child) + child = group(child) elif name[0] == "i": amount = self.indent_amounts[name] @@ -329,10 +353,10 @@ class Matcher: child = cons(NewLine(replace), child) elif name[0] == "f": - child = cons(child, ForceBreak()) + child = cons(child, ForceBreak(False)) elif name[0] == "d": - child = cons(ForceBreak(), child) + child = cons(ForceBreak(False), child) else: pass # Reducing a transparent rule probably. @@ -347,8 +371,8 @@ class Matcher: if isinstance(value, runtime.Tree): child = Lazy.from_tree(value, printer) else: - # TODO: Consider trivia and preserve comments! child = Text(value.start, value.end) + child = cons(child, self.apply_trivia(value.post_trivia)) stack.append((action.state, child)) input_index += 1 @@ -356,6 +380,47 @@ class Matcher: case parser.Error(): raise Exception("How did I get a parse error here??") + def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document: + had_newline = False + trivia_doc = None + for token in trivia: + mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore) + match mode: + case parser.TriviaMode.Ignore: + pass + + case parser.TriviaMode.NewLine: + # We ignore line breaks because obviously + # we expect the pretty-printer to put the + # line breaks in where they belong *but* + # we track if they happened to influence + # the layout. + had_newline = True + + case parser.TriviaMode.LineComment: + if had_newline: + # This line comment is all alone on + # its line, so we need to maintain + # that. + line_break = NewLine("") + else: + # This line comment is attached to + # something to the left, reduce it to + # a space. + line_break = Literal(" ") + + trivia_doc = cons( + trivia_doc, + line_break, + Text(token.start, token.end), + ForceBreak(True), # This is probably the wrong place for this! + ) + + case _: + typing.assert_never(mode) + + return trivia_doc + class Printer: # TODO: Pre-generate the matcher tables for a grammar, to make it @@ -364,6 +429,7 @@ class Printer: _matchers: dict[str, Matcher] _nonterminals: dict[str, parser.NonTerminal] _indent: str + _trivia_mode: dict[str, parser.TriviaMode] def __init__(self, grammar: parser.Grammar, indent: str | None = None): self.grammar = grammar @@ -371,9 +437,18 @@ class Printer: self._matchers = {} if indent is None: - indent = getattr(self.grammar, "pretty_indent", " ") + indent = getattr(self.grammar, "pretty_indent", None) + if indent is None: + indent = " " self._indent = indent + trivia_mode = {} + for t in grammar.terminals(): + mode = t.meta.get("trivia_mode") + if t.name is not None and isinstance(mode, parser.TriviaMode): + trivia_mode[t.name] = mode + self._trivia_mode = trivia_mode + def indent(self) -> str: return self._indent @@ -535,6 +610,7 @@ class Printer: parse_table, indent_amounts, final_newlines, + self._trivia_mode, ) def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: