Really messing around with trivia, it's not good yet

It's really not clear how to track it and how to compose it with
groups yet. Really very difficult.
This commit is contained in:
John Doty 2024-09-14 17:14:07 -07:00
parent 71b59302fa
commit d5ccd5b147
4 changed files with 131 additions and 30 deletions

View file

@ -5,6 +5,7 @@ from parser import (
Re, Re,
Rule, Rule,
Terminal, Terminal,
TriviaMode,
alt, alt,
br, br,
group, group,
@ -23,7 +24,7 @@ class FineGrammar(Grammar):
# generator = parser.GenerateLR1 # generator = parser.GenerateLR1
start = "File" start = "File"
trivia = ["BLANKS", "COMMENT"] trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
pretty_indent = " " pretty_indent = " "
@ -75,13 +76,11 @@ class FineGrammar(Grammar):
@rule("ClassDeclaration") @rule("ClassDeclaration")
def class_declaration(self) -> Rule: def class_declaration(self) -> Rule:
return seq( return seq(
group(
group( group(
self.CLASS, self.CLASS,
sp, sp,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
sp, sp,
),
self.LCURLY, self.LCURLY,
), ),
indent(nl, mark(opt(self.class_body), field="body")), indent(nl, mark(opt(self.class_body), field="body")),
@ -193,7 +192,7 @@ class FineGrammar(Grammar):
def block(self) -> Rule: def block(self) -> Rule:
return alt( return alt(
group(self.LCURLY, nl, self.RCURLY), group(self.LCURLY, nl, self.RCURLY),
seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY), group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
) )
@rule("BlockBody") @rule("BlockBody")
@ -201,7 +200,7 @@ class FineGrammar(Grammar):
return alt( return alt(
self.expression, self.expression,
self._statement_list, self._statement_list,
seq(self._statement_list, nl, self.expression), seq(self._statement_list, br, self.expression),
) )
@rule @rule
@ -420,10 +419,12 @@ class FineGrammar(Grammar):
def field_value(self) -> Rule: def field_value(self) -> Rule:
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression)) return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal( COMMENT = Terminal(
Re.seq(Re.literal("//"), Re.set("\n").invert().star()), Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line, highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
) )
ARROW = Terminal("->", highlight=highlight.keyword.operator) ARROW = Terminal("->", highlight=highlight.keyword.operator)

View file

@ -543,12 +543,12 @@ class Harness:
case wadler.Text(start, end): case wadler.Text(start, end):
if self.source is not None: if self.source is not None:
append(f"< {self.source[start:end]}") append(f"< {repr(self.source[start:end])}")
else: else:
append(f"< ??? {start}:{end}") append(f"< ??? {start}:{end}")
case wadler.Literal(text): case wadler.Literal(text):
append(f"' {text}") append(f"literal {repr(text)}")
case wadler.Group(): case wadler.Group():
append("group") append("group")

View file

@ -2724,11 +2724,35 @@ sp = newline(" ")
def forced_break() -> Rule: def forced_break() -> Rule:
"""Indicate that the line MUST break right here, for whatever reason."""
return mark(Nothing, format=FormatMeta(forced_break=True)) return mark(Nothing, format=FormatMeta(forced_break=True))
br = forced_break() br = forced_break()
class TriviaMode(enum.Enum):
"""Indicate how a particular bit of trivia is to be handled during
pretty-printing. Attach this to a "trivia_mode" property on a Terminal
definition.
- Ignore means that the trivia should be ignored. (This is the default.)
- NewLine means that the trivia is a line break. This is important for
other modes, specifically...
- LineComment means that the trivia is a line comment. If a line comment
is alone on a line, then a forced break is inserted so that it remains
alone on its line after formatting, otherwise it is attached to whatever
is to its left by a single space. A LineComment is *always* followed by
a forced break.
"""
Ignore = 0
NewLine = 1
LineComment = 2
############################################################################### ###############################################################################
# Finally, the base class for grammars # Finally, the base class for grammars
############################################################################### ###############################################################################

View file

@ -17,13 +17,6 @@ class Cons:
right: "Document" right: "Document"
def cons(left: "Document", right: "Document") -> "Document":
if left and right:
return Cons(left, right)
else:
return left or right
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class NewLine: class NewLine:
replace: str replace: str
@ -31,7 +24,7 @@ class NewLine:
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class ForceBreak: class ForceBreak:
pass silent: bool
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
@ -79,6 +72,21 @@ class Lazy:
Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy
def cons(*documents: Document) -> Document:
result = None
for doc in documents:
if result is None:
result = doc
elif doc is not None:
result = Cons(result, doc)
return result
def group(document: Document) -> Document:
return Group(document)
############################################################################ ############################################################################
# Layouts # Layouts
############################################################################ ############################################################################
@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat) return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
column = 0 column = 0
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)] chunks: list[Chunk] = [
Chunk(
doc=doc,
indent=0,
flat=False, # NOTE: Assume flat until we know how to break.
)
]
def fits(chunk: Chunk) -> bool: def fits(chunk: Chunk) -> bool:
remaining = width - column remaining = width - column
@ -216,8 +230,9 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
output.append("\n" + (chunk.indent * indent)) output.append("\n" + (chunk.indent * indent))
column = chunk.indent * len(indent) column = chunk.indent * len(indent)
case ForceBreak(): case ForceBreak(silent):
# TODO: Custom newline expansion, custom indent segments. # TODO: Custom newline expansion, custom indent segments.
if not silent:
output.append("\n" + (chunk.indent * indent)) output.append("\n" + (chunk.indent * indent))
column = chunk.indent * len(indent) column = chunk.indent * len(indent)
@ -276,11 +291,18 @@ class Matcher:
table: parser.ParseTable table: parser.ParseTable
indent_amounts: dict[str, int] indent_amounts: dict[str, int]
newline_replace: dict[str, str] newline_replace: dict[str, str]
trivia_mode: dict[str, parser.TriviaMode]
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document: def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
stack: list[tuple[int, Document]] = [(0, None)] stack: list[tuple[int, Document]] = [(0, None)]
table = self.table table = self.table
# eof_trivia = []
# if len(items) > 0:
# item = items[-1]
# if isinstance(item, runtime.TokenValue):
# eof_trivia = item.post_trivia
input = [(child_to_name(i), i) for i in items] + [ input = [(child_to_name(i), i) for i in items] + [
( (
"$", "$",
@ -302,7 +324,9 @@ class Matcher:
match action: match action:
case parser.Accept(): case parser.Accept():
return stack[-1][1] result = stack[-1][1]
# result = cons(result, self.apply_trivia(eof_trivia))
return result
case parser.Reduce(name=name, count=size): case parser.Reduce(name=name, count=size):
child: Document = None child: Document = None
@ -314,7 +338,7 @@ class Matcher:
del stack[-size:] del stack[-size:]
if name[0] == "g": if name[0] == "g":
child = Group(child) child = group(child)
elif name[0] == "i": elif name[0] == "i":
amount = self.indent_amounts[name] amount = self.indent_amounts[name]
@ -329,10 +353,10 @@ class Matcher:
child = cons(NewLine(replace), child) child = cons(NewLine(replace), child)
elif name[0] == "f": elif name[0] == "f":
child = cons(child, ForceBreak()) child = cons(child, ForceBreak(False))
elif name[0] == "d": elif name[0] == "d":
child = cons(ForceBreak(), child) child = cons(ForceBreak(False), child)
else: else:
pass # Reducing a transparent rule probably. pass # Reducing a transparent rule probably.
@ -347,8 +371,8 @@ class Matcher:
if isinstance(value, runtime.Tree): if isinstance(value, runtime.Tree):
child = Lazy.from_tree(value, printer) child = Lazy.from_tree(value, printer)
else: else:
# TODO: Consider trivia and preserve comments!
child = Text(value.start, value.end) child = Text(value.start, value.end)
child = cons(child, self.apply_trivia(value.post_trivia))
stack.append((action.state, child)) stack.append((action.state, child))
input_index += 1 input_index += 1
@ -356,6 +380,47 @@ class Matcher:
case parser.Error(): case parser.Error():
raise Exception("How did I get a parse error here??") raise Exception("How did I get a parse error here??")
def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document:
had_newline = False
trivia_doc = None
for token in trivia:
mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
match mode:
case parser.TriviaMode.Ignore:
pass
case parser.TriviaMode.NewLine:
# We ignore line breaks because obviously
# we expect the pretty-printer to put the
# line breaks in where they belong *but*
# we track if they happened to influence
# the layout.
had_newline = True
case parser.TriviaMode.LineComment:
if had_newline:
# This line comment is all alone on
# its line, so we need to maintain
# that.
line_break = NewLine("")
else:
# This line comment is attached to
# something to the left, reduce it to
# a space.
line_break = Literal(" ")
trivia_doc = cons(
trivia_doc,
line_break,
Text(token.start, token.end),
ForceBreak(True), # This is probably the wrong place for this!
)
case _:
typing.assert_never(mode)
return trivia_doc
class Printer: class Printer:
# TODO: Pre-generate the matcher tables for a grammar, to make it # TODO: Pre-generate the matcher tables for a grammar, to make it
@ -364,6 +429,7 @@ class Printer:
_matchers: dict[str, Matcher] _matchers: dict[str, Matcher]
_nonterminals: dict[str, parser.NonTerminal] _nonterminals: dict[str, parser.NonTerminal]
_indent: str _indent: str
_trivia_mode: dict[str, parser.TriviaMode]
def __init__(self, grammar: parser.Grammar, indent: str | None = None): def __init__(self, grammar: parser.Grammar, indent: str | None = None):
self.grammar = grammar self.grammar = grammar
@ -371,9 +437,18 @@ class Printer:
self._matchers = {} self._matchers = {}
if indent is None: if indent is None:
indent = getattr(self.grammar, "pretty_indent", " ") indent = getattr(self.grammar, "pretty_indent", None)
if indent is None:
indent = " "
self._indent = indent self._indent = indent
trivia_mode = {}
for t in grammar.terminals():
mode = t.meta.get("trivia_mode")
if t.name is not None and isinstance(mode, parser.TriviaMode):
trivia_mode[t.name] = mode
self._trivia_mode = trivia_mode
def indent(self) -> str: def indent(self) -> str:
return self._indent return self._indent
@ -535,6 +610,7 @@ class Printer:
parse_table, parse_table,
indent_amounts, indent_amounts,
final_newlines, final_newlines,
self._trivia_mode,
) )
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: