[wadler] Prettier handling of trivia
Split the rules for pre- and post- trivia, understand when we want to do either, handle multi-line-break (in an unsatisfying way, I guess) but otherwise lay the groundwork for thinking about it better. Also now we don't generate lazy "Text" nodes because I thought I might want to actually look at the newlines in the source but I don't yet. I *can* now, though. (I can also detect EOF so there's that.)
This commit is contained in:
parent
c31d527077
commit
8a17cfd586
5 changed files with 159 additions and 108 deletions
|
|
@ -24,7 +24,7 @@ class FineGrammar(Grammar):
|
|||
# generator = parser.GenerateLR1
|
||||
start = "File"
|
||||
|
||||
trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
|
||||
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
|
||||
|
||||
pretty_indent = " "
|
||||
|
||||
|
|
@ -426,7 +426,7 @@ class FineGrammar(Grammar):
|
|||
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
|
||||
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
|
||||
highlight=highlight.comment.line,
|
||||
|
|
|
|||
|
|
@ -371,7 +371,7 @@ class Harness:
|
|||
|
||||
printer = self.load_printer()
|
||||
if self.tree is not None:
|
||||
self.document = printer.convert_tree_to_document(self.tree)
|
||||
self.document = printer.convert_tree_to_document(self.tree, self.source)
|
||||
else:
|
||||
self.document = None
|
||||
|
||||
|
|
@ -541,12 +541,6 @@ class Harness:
|
|||
append(f"indent {doc.amount}")
|
||||
self.format_document(lines, doc.doc, indent + 1)
|
||||
|
||||
case wadler.Text(start, end):
|
||||
if self.source is not None:
|
||||
append(f"< {repr(self.source[start:end])}")
|
||||
else:
|
||||
append(f"< ??? {start}:{end}")
|
||||
|
||||
case wadler.Literal(text):
|
||||
append(f"literal {repr(text)}")
|
||||
|
||||
|
|
|
|||
|
|
@ -2109,6 +2109,10 @@ class Re:
|
|||
UNICODE_MAX_CP = 1114112
|
||||
|
||||
|
||||
def _str_repr(x: int) -> str:
|
||||
return repr(chr(x))[1:-1]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReSet(Re):
|
||||
values: list[Span]
|
||||
|
|
@ -2165,12 +2169,12 @@ class ReSet(Re):
|
|||
if len(self.values) == 1:
|
||||
span = self.values[0]
|
||||
if len(span) == 1:
|
||||
return chr(span.lower)
|
||||
return _str_repr(span.lower)
|
||||
|
||||
ranges = []
|
||||
for span in self.values:
|
||||
start = chr(span.lower)
|
||||
end = chr(span.upper - 1)
|
||||
start = _str_repr(span.lower)
|
||||
end = _str_repr(span.upper - 1)
|
||||
if start == end:
|
||||
ranges.append(start)
|
||||
else:
|
||||
|
|
@ -2736,7 +2740,7 @@ class TriviaMode(enum.Enum):
|
|||
pretty-printing. Attach this to a "trivia_mode" property on a Terminal
|
||||
definition.
|
||||
|
||||
- Ignore means that the trivia should be ignored. (This is the default.)
|
||||
- Blank means that the trivia represents blank space. (This is the default.)
|
||||
|
||||
- NewLine means that the trivia is a line break. This is important for
|
||||
other modes, specifically...
|
||||
|
|
@ -2748,7 +2752,7 @@ class TriviaMode(enum.Enum):
|
|||
a forced break.
|
||||
"""
|
||||
|
||||
Ignore = 0
|
||||
Blank = 0
|
||||
NewLine = 1
|
||||
LineComment = 2
|
||||
|
||||
|
|
|
|||
157
parser/wadler.py
157
parser/wadler.py
|
|
@ -32,12 +32,6 @@ class Indent:
|
|||
doc: "Document"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Text:
|
||||
start: int
|
||||
end: int
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Literal:
|
||||
text: str
|
||||
|
|
@ -69,13 +63,11 @@ class Lazy:
|
|||
return self.value
|
||||
|
||||
@classmethod
|
||||
def from_tree(cls, tree: runtime.Tree, printer: "Printer") -> "Lazy":
|
||||
return Lazy(lambda: printer.convert_tree_to_document(tree))
|
||||
def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
|
||||
return Lazy(lambda: printer.convert_tree_to_document(tree, src))
|
||||
|
||||
|
||||
Document = (
|
||||
None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
|
||||
)
|
||||
Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
|
||||
|
||||
|
||||
def cons(*documents: Document) -> Document:
|
||||
|
|
@ -207,9 +199,6 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
|||
case None:
|
||||
pass
|
||||
|
||||
case Text(start, end):
|
||||
remaining -= end - start
|
||||
|
||||
case Literal(text):
|
||||
remaining -= len(text)
|
||||
|
||||
|
|
@ -268,10 +257,6 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
|||
case None:
|
||||
pass
|
||||
|
||||
case Text(start, end):
|
||||
output.append((start, end))
|
||||
column += end - start
|
||||
|
||||
case Literal(text):
|
||||
output.append(text)
|
||||
column += len(text)
|
||||
|
|
@ -337,7 +322,7 @@ def resolve_document(doc: Document) -> Document:
|
|||
case Trivia(child):
|
||||
return Trivia(resolve_document(child))
|
||||
|
||||
case Text() | Literal() | NewLine() | ForceBreak() | Indent() | None:
|
||||
case Literal() | NewLine() | ForceBreak() | Indent() | None:
|
||||
return doc
|
||||
|
||||
case _:
|
||||
|
|
@ -358,7 +343,12 @@ class Matcher:
|
|||
newline_replace: dict[str, str]
|
||||
trivia_mode: dict[str, parser.TriviaMode]
|
||||
|
||||
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
|
||||
def match(
|
||||
self,
|
||||
printer: "Printer",
|
||||
items: list[runtime.Tree | runtime.TokenValue],
|
||||
src: str,
|
||||
) -> Document:
|
||||
stack: list[tuple[int, Document]] = [(0, None)]
|
||||
table = self.table
|
||||
|
||||
|
|
@ -434,10 +424,13 @@ class Matcher:
|
|||
value = current_token[1]
|
||||
|
||||
if isinstance(value, runtime.Tree):
|
||||
child = Lazy.from_tree(value, printer)
|
||||
child = Lazy.from_tree(value, src, printer)
|
||||
else:
|
||||
child = Text(value.start, value.end)
|
||||
child = cons(child, self.apply_trivia(value.post_trivia))
|
||||
child = cons(
|
||||
trivia(self.apply_pre_trivia(value.pre_trivia, src)),
|
||||
Literal(src[value.start : value.end]),
|
||||
trivia(self.apply_post_trivia(value.post_trivia, src)),
|
||||
)
|
||||
|
||||
stack.append((action.state, child))
|
||||
input_index += 1
|
||||
|
|
@ -445,46 +438,100 @@ class Matcher:
|
|||
case parser.Error():
|
||||
raise Exception("How did I get a parse error here??")
|
||||
|
||||
def apply_trivia(self, trivia_tokens: list[runtime.TokenValue]) -> Document:
|
||||
has_newline = False
|
||||
def slice_pre_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> tuple[
|
||||
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
||||
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
||||
]:
|
||||
tokens = [
|
||||
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
|
||||
for token in trivia_tokens
|
||||
]
|
||||
|
||||
for index, (mode, token) in enumerate(tokens):
|
||||
if token.start == 0:
|
||||
# Everything is pre-trivia if we're at the start of the file.
|
||||
return (tokens, [])
|
||||
|
||||
if mode == parser.TriviaMode.NewLine:
|
||||
# This is the first newline; it belongs with the post-trivia.
|
||||
return (tokens[index + 1 :], tokens[: index + 1])
|
||||
|
||||
# If we never found a new line then it's all post-trivia.
|
||||
return ([], tokens)
|
||||
|
||||
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
||||
pre_trivia, _ = self.slice_pre_post_trivia(trivia_tokens, src)
|
||||
if len(pre_trivia) == 0:
|
||||
return None
|
||||
|
||||
at_start_of_file = pre_trivia[0][1].start == 0
|
||||
|
||||
trivia_doc = None
|
||||
for token in trivia_tokens:
|
||||
mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
|
||||
new_line_count = 0
|
||||
for mode, token in pre_trivia:
|
||||
match mode:
|
||||
case parser.TriviaMode.Ignore:
|
||||
case parser.TriviaMode.LineComment:
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
Literal(src[token.start : token.end]),
|
||||
ForceBreak(False),
|
||||
)
|
||||
new_line_count = 0 # There will be a newline after this.
|
||||
at_start_of_file = False
|
||||
|
||||
case parser.TriviaMode.Blank:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
# We ignore line breaks because obviously
|
||||
# we expect the pretty-printer to put the
|
||||
# line breaks in where they belong *but*
|
||||
# we track if they happened to influence
|
||||
# the layout.
|
||||
has_newline = True
|
||||
|
||||
case parser.TriviaMode.LineComment:
|
||||
if has_newline:
|
||||
# This line comment is all alone on
|
||||
# its line, so we need to maintain
|
||||
# that.
|
||||
line_break = NewLine("")
|
||||
else:
|
||||
# This line comment is attached to
|
||||
# something to the left, reduce it to
|
||||
# a space.
|
||||
line_break = Literal(" ")
|
||||
|
||||
new_line_count += 1
|
||||
if new_line_count == 2 and not at_start_of_file:
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
line_break,
|
||||
Text(token.start, token.end),
|
||||
ForceBreak(True), # This is probably the wrong place for this!
|
||||
ForceBreak(False),
|
||||
ForceBreak(False),
|
||||
)
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
return trivia(trivia_doc)
|
||||
return trivia_doc
|
||||
|
||||
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
||||
_, post_trivia = self.slice_pre_post_trivia(trivia_tokens, src)
|
||||
if len(post_trivia) == 0:
|
||||
return None
|
||||
|
||||
trivia_doc = None
|
||||
for mode, token in post_trivia:
|
||||
match mode:
|
||||
case parser.TriviaMode.Blank:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
# Anything after a line break is not processed as post
|
||||
# trivia.
|
||||
break
|
||||
|
||||
case parser.TriviaMode.LineComment:
|
||||
# Because this is post-trivia, we know there's something
|
||||
# to our left, and we can force the space.
|
||||
trivia_doc = cons(
|
||||
Literal(" "),
|
||||
Literal(src[token.start : token.end]),
|
||||
ForceBreak(True), # And the line needs to end.
|
||||
)
|
||||
break
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
|
||||
# As a special case, if we're post trivia at the end of the file
|
||||
# then we also need to be pre-trivia too, for the hypthetical EOF
|
||||
# token that we never see.
|
||||
trivia_doc = cons(trivia_doc, self.apply_pre_trivia(trivia_tokens, src))
|
||||
|
||||
return trivia_doc
|
||||
|
||||
|
||||
class Printer:
|
||||
|
|
@ -686,19 +733,19 @@ class Printer:
|
|||
|
||||
return result
|
||||
|
||||
def convert_tree_to_document(self, tree: runtime.Tree) -> Document:
|
||||
def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
|
||||
name = tree.name
|
||||
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
|
||||
|
||||
rule = self.lookup_nonterminal(name)
|
||||
matcher = self.rule_to_matcher(rule)
|
||||
m = matcher.match(self, list(tree.children))
|
||||
m = matcher.match(self, list(tree.children), src)
|
||||
if m is None:
|
||||
raise ValueError(
|
||||
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
|
||||
)
|
||||
return resolve_document(m)
|
||||
|
||||
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
|
||||
doc = self.convert_tree_to_document(tree)
|
||||
def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
|
||||
doc = self.convert_tree_to_document(tree, src)
|
||||
return layout_document(doc, width, self._indent)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from parser.parser import (
|
|||
sp,
|
||||
nl,
|
||||
br,
|
||||
TriviaMode,
|
||||
)
|
||||
|
||||
import parser.runtime as runtime
|
||||
|
|
@ -72,6 +73,7 @@ class JsonGrammar(Grammar):
|
|||
)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COMMA = Terminal(",")
|
||||
|
|
@ -118,8 +120,6 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
|||
return ["<forced break>"]
|
||||
case wadler.Indent():
|
||||
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
||||
case wadler.Text(start, end):
|
||||
return [src[start:end]]
|
||||
case wadler.Literal(text):
|
||||
return [text]
|
||||
case wadler.Group():
|
||||
|
|
@ -149,7 +149,7 @@ def test_convert_tree_to_document():
|
|||
assert tree is not None
|
||||
|
||||
printer = wadler.Printer(JSON)
|
||||
doc = flatten_document(printer.convert_tree_to_document(tree), text)
|
||||
doc = flatten_document(printer.convert_tree_to_document(tree, text), text)
|
||||
|
||||
assert doc == [
|
||||
[
|
||||
|
|
@ -212,7 +212,7 @@ def test_layout_basic():
|
|||
assert tree is not None
|
||||
|
||||
printer = wadler.Printer(JSON)
|
||||
result = printer.format_tree(tree, 50).apply_to_source(text)
|
||||
result = printer.format_tree(tree, text, 50).apply_to_source(text)
|
||||
|
||||
assert (
|
||||
result
|
||||
|
|
@ -226,10 +226,9 @@ def test_layout_basic():
|
|||
)
|
||||
|
||||
|
||||
def test_forced_break():
|
||||
class TG(Grammar):
|
||||
class TG(Grammar):
|
||||
start = "root"
|
||||
trivia = ["BLANKS"]
|
||||
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
|
||||
|
||||
@rule
|
||||
def root(self):
|
||||
|
|
@ -256,8 +255,15 @@ def test_forced_break():
|
|||
OK = Terminal("ok")
|
||||
BREAK = Terminal("break")
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
|
||||
|
||||
def test_forced_break():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = runtime.Parser(g.build_table())
|
||||
|
|
@ -269,7 +275,7 @@ def test_forced_break():
|
|||
assert tree is not None
|
||||
|
||||
printer = wadler.Printer(g)
|
||||
result = printer.format_tree(tree, 200).apply_to_source(text)
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert (
|
||||
result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue