[wadler] Prettier handling of trivia

Split the rules for pre- and post- trivia, understand when we want to
do either, handle multi-line-break (in an unsatisfying way, I guess)
but otherwise lay the groundwork for thinking about it better.

Also now we don't generate lazy "Text" nodes because I thought I might
want to actually look at the newlines in the source but I don't yet.
I *can* now, though. (I can also detect EOF so there's that.)
This commit is contained in:
John Doty 2024-09-19 16:39:32 -07:00
parent c31d527077
commit 8a17cfd586
5 changed files with 159 additions and 108 deletions

View file

@ -24,7 +24,7 @@ class FineGrammar(Grammar):
# generator = parser.GenerateLR1 # generator = parser.GenerateLR1
start = "File" start = "File"
trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"] trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
pretty_indent = " " pretty_indent = " "
@ -426,7 +426,7 @@ class FineGrammar(Grammar):
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression)) return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
BLANKS = Terminal(Re.set(" ", "\t").plus()) BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine) LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal( COMMENT = Terminal(
Re.seq(Re.literal("//"), Re.set("\n").invert().star()), Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line, highlight=highlight.comment.line,

View file

@ -371,7 +371,7 @@ class Harness:
printer = self.load_printer() printer = self.load_printer()
if self.tree is not None: if self.tree is not None:
self.document = printer.convert_tree_to_document(self.tree) self.document = printer.convert_tree_to_document(self.tree, self.source)
else: else:
self.document = None self.document = None
@ -541,12 +541,6 @@ class Harness:
append(f"indent {doc.amount}") append(f"indent {doc.amount}")
self.format_document(lines, doc.doc, indent + 1) self.format_document(lines, doc.doc, indent + 1)
case wadler.Text(start, end):
if self.source is not None:
append(f"< {repr(self.source[start:end])}")
else:
append(f"< ??? {start}:{end}")
case wadler.Literal(text): case wadler.Literal(text):
append(f"literal {repr(text)}") append(f"literal {repr(text)}")

View file

@ -2109,6 +2109,10 @@ class Re:
UNICODE_MAX_CP = 1114112 UNICODE_MAX_CP = 1114112
def _str_repr(x: int) -> str:
return repr(chr(x))[1:-1]
@dataclasses.dataclass @dataclasses.dataclass
class ReSet(Re): class ReSet(Re):
values: list[Span] values: list[Span]
@ -2165,12 +2169,12 @@ class ReSet(Re):
if len(self.values) == 1: if len(self.values) == 1:
span = self.values[0] span = self.values[0]
if len(span) == 1: if len(span) == 1:
return chr(span.lower) return _str_repr(span.lower)
ranges = [] ranges = []
for span in self.values: for span in self.values:
start = chr(span.lower) start = _str_repr(span.lower)
end = chr(span.upper - 1) end = _str_repr(span.upper - 1)
if start == end: if start == end:
ranges.append(start) ranges.append(start)
else: else:
@ -2736,7 +2740,7 @@ class TriviaMode(enum.Enum):
pretty-printing. Attach this to a "trivia_mode" property on a Terminal pretty-printing. Attach this to a "trivia_mode" property on a Terminal
definition. definition.
- Ignore means that the trivia should be ignored. (This is the default.) - Blank means that the trivia represents blank space. (This is the default.)
- NewLine means that the trivia is a line break. This is important for - NewLine means that the trivia is a line break. This is important for
other modes, specifically... other modes, specifically...
@ -2748,7 +2752,7 @@ class TriviaMode(enum.Enum):
a forced break. a forced break.
""" """
Ignore = 0 Blank = 0
NewLine = 1 NewLine = 1
LineComment = 2 LineComment = 2

View file

@ -32,12 +32,6 @@ class Indent:
doc: "Document" doc: "Document"
@dataclasses.dataclass(frozen=True)
class Text:
start: int
end: int
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class Literal: class Literal:
text: str text: str
@ -69,13 +63,11 @@ class Lazy:
return self.value return self.value
@classmethod @classmethod
def from_tree(cls, tree: runtime.Tree, printer: "Printer") -> "Lazy": def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
return Lazy(lambda: printer.convert_tree_to_document(tree)) return Lazy(lambda: printer.convert_tree_to_document(tree, src))
Document = ( Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
)
def cons(*documents: Document) -> Document: def cons(*documents: Document) -> Document:
@ -207,9 +199,6 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
case None: case None:
pass pass
case Text(start, end):
remaining -= end - start
case Literal(text): case Literal(text):
remaining -= len(text) remaining -= len(text)
@ -268,10 +257,6 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
case None: case None:
pass pass
case Text(start, end):
output.append((start, end))
column += end - start
case Literal(text): case Literal(text):
output.append(text) output.append(text)
column += len(text) column += len(text)
@ -337,7 +322,7 @@ def resolve_document(doc: Document) -> Document:
case Trivia(child): case Trivia(child):
return Trivia(resolve_document(child)) return Trivia(resolve_document(child))
case Text() | Literal() | NewLine() | ForceBreak() | Indent() | None: case Literal() | NewLine() | ForceBreak() | Indent() | None:
return doc return doc
case _: case _:
@ -358,7 +343,12 @@ class Matcher:
newline_replace: dict[str, str] newline_replace: dict[str, str]
trivia_mode: dict[str, parser.TriviaMode] trivia_mode: dict[str, parser.TriviaMode]
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document: def match(
self,
printer: "Printer",
items: list[runtime.Tree | runtime.TokenValue],
src: str,
) -> Document:
stack: list[tuple[int, Document]] = [(0, None)] stack: list[tuple[int, Document]] = [(0, None)]
table = self.table table = self.table
@ -434,10 +424,13 @@ class Matcher:
value = current_token[1] value = current_token[1]
if isinstance(value, runtime.Tree): if isinstance(value, runtime.Tree):
child = Lazy.from_tree(value, printer) child = Lazy.from_tree(value, src, printer)
else: else:
child = Text(value.start, value.end) child = cons(
child = cons(child, self.apply_trivia(value.post_trivia)) trivia(self.apply_pre_trivia(value.pre_trivia, src)),
Literal(src[value.start : value.end]),
trivia(self.apply_post_trivia(value.post_trivia, src)),
)
stack.append((action.state, child)) stack.append((action.state, child))
input_index += 1 input_index += 1
@ -445,46 +438,100 @@ class Matcher:
case parser.Error(): case parser.Error():
raise Exception("How did I get a parse error here??") raise Exception("How did I get a parse error here??")
def apply_trivia(self, trivia_tokens: list[runtime.TokenValue]) -> Document: def slice_pre_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> tuple[
has_newline = False list[tuple[parser.TriviaMode, runtime.TokenValue]],
list[tuple[parser.TriviaMode, runtime.TokenValue]],
]:
tokens = [
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
for token in trivia_tokens
]
for index, (mode, token) in enumerate(tokens):
if token.start == 0:
# Everything is pre-trivia if we're at the start of the file.
return (tokens, [])
if mode == parser.TriviaMode.NewLine:
# This is the first newline; it belongs with the post-trivia.
return (tokens[index + 1 :], tokens[: index + 1])
# If we never found a new line then it's all post-trivia.
return ([], tokens)
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
pre_trivia, _ = self.slice_pre_post_trivia(trivia_tokens, src)
if len(pre_trivia) == 0:
return None
at_start_of_file = pre_trivia[0][1].start == 0
trivia_doc = None trivia_doc = None
for token in trivia_tokens: new_line_count = 0
mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore) for mode, token in pre_trivia:
match mode: match mode:
case parser.TriviaMode.Ignore: case parser.TriviaMode.LineComment:
trivia_doc = cons(
trivia_doc,
Literal(src[token.start : token.end]),
ForceBreak(False),
)
new_line_count = 0 # There will be a newline after this.
at_start_of_file = False
case parser.TriviaMode.Blank:
pass pass
case parser.TriviaMode.NewLine: case parser.TriviaMode.NewLine:
# We ignore line breaks because obviously new_line_count += 1
# we expect the pretty-printer to put the if new_line_count == 2 and not at_start_of_file:
# line breaks in where they belong *but*
# we track if they happened to influence
# the layout.
has_newline = True
case parser.TriviaMode.LineComment:
if has_newline:
# This line comment is all alone on
# its line, so we need to maintain
# that.
line_break = NewLine("")
else:
# This line comment is attached to
# something to the left, reduce it to
# a space.
line_break = Literal(" ")
trivia_doc = cons( trivia_doc = cons(
trivia_doc, trivia_doc,
line_break, ForceBreak(False),
Text(token.start, token.end), ForceBreak(False),
ForceBreak(True), # This is probably the wrong place for this!
) )
case _: case _:
typing.assert_never(mode) typing.assert_never(mode)
return trivia(trivia_doc) return trivia_doc
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
_, post_trivia = self.slice_pre_post_trivia(trivia_tokens, src)
if len(post_trivia) == 0:
return None
trivia_doc = None
for mode, token in post_trivia:
match mode:
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
# Anything after a line break is not processed as post
# trivia.
break
case parser.TriviaMode.LineComment:
# Because this is post-trivia, we know there's something
# to our left, and we can force the space.
trivia_doc = cons(
Literal(" "),
Literal(src[token.start : token.end]),
ForceBreak(True), # And the line needs to end.
)
break
case _:
typing.assert_never(mode)
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
# As a special case, if we're post trivia at the end of the file
# then we also need to be pre-trivia too, for the hypthetical EOF
# token that we never see.
trivia_doc = cons(trivia_doc, self.apply_pre_trivia(trivia_tokens, src))
return trivia_doc
class Printer: class Printer:
@ -686,19 +733,19 @@ class Printer:
return result return result
def convert_tree_to_document(self, tree: runtime.Tree) -> Document: def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
name = tree.name name = tree.name
assert name is not None, "Cannot format a tree if it still has transparent nodes inside" assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
rule = self.lookup_nonterminal(name) rule = self.lookup_nonterminal(name)
matcher = self.rule_to_matcher(rule) matcher = self.rule_to_matcher(rule)
m = matcher.match(self, list(tree.children)) m = matcher.match(self, list(tree.children), src)
if m is None: if m is None:
raise ValueError( raise ValueError(
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}" f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
) )
return resolve_document(m) return resolve_document(m)
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout: def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
doc = self.convert_tree_to_document(tree) doc = self.convert_tree_to_document(tree, src)
return layout_document(doc, width, self._indent) return layout_document(doc, width, self._indent)

View file

@ -14,6 +14,7 @@ from parser.parser import (
sp, sp,
nl, nl,
br, br,
TriviaMode,
) )
import parser.runtime as runtime import parser.runtime as runtime
@ -72,6 +73,7 @@ class JsonGrammar(Grammar):
) )
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
LCURLY = Terminal("{") LCURLY = Terminal("{")
RCURLY = Terminal("}") RCURLY = Terminal("}")
COMMA = Terminal(",") COMMA = Terminal(",")
@ -118,8 +120,6 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
return ["<forced break>"] return ["<forced break>"]
case wadler.Indent(): case wadler.Indent():
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]] return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
case wadler.Text(start, end):
return [src[start:end]]
case wadler.Literal(text): case wadler.Literal(text):
return [text] return [text]
case wadler.Group(): case wadler.Group():
@ -149,7 +149,7 @@ def test_convert_tree_to_document():
assert tree is not None assert tree is not None
printer = wadler.Printer(JSON) printer = wadler.Printer(JSON)
doc = flatten_document(printer.convert_tree_to_document(tree), text) doc = flatten_document(printer.convert_tree_to_document(tree, text), text)
assert doc == [ assert doc == [
[ [
@ -212,7 +212,7 @@ def test_layout_basic():
assert tree is not None assert tree is not None
printer = wadler.Printer(JSON) printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 50).apply_to_source(text) result = printer.format_tree(tree, text, 50).apply_to_source(text)
assert ( assert (
result result
@ -226,10 +226,9 @@ def test_layout_basic():
) )
def test_forced_break(): class TG(Grammar):
class TG(Grammar):
start = "root" start = "root"
trivia = ["BLANKS"] trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
@rule @rule
def root(self): def root(self):
@ -256,8 +255,15 @@ def test_forced_break():
OK = Terminal("ok") OK = Terminal("ok")
BREAK = Terminal("break") BREAK = Terminal("break")
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
trivia_mode=TriviaMode.LineComment,
)
def test_forced_break():
g = TG() g = TG()
g_lexer = g.compile_lexer() g_lexer = g.compile_lexer()
g_parser = runtime.Parser(g.build_table()) g_parser = runtime.Parser(g.build_table())
@ -269,7 +275,7 @@ def test_forced_break():
assert tree is not None assert tree is not None
printer = wadler.Printer(g) printer = wadler.Printer(g)
result = printer.format_tree(tree, 200).apply_to_source(text) result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert ( assert (
result result