Actual pretty-printing!

Now we're cooking with gas ALTHOUGH now we have to deal with the fact
that we're gluing everything together where there *should* be spaces.

Many more improvements to come.
This commit is contained in:
John Doty 2024-09-11 11:08:02 -07:00
parent 5d88b459b9
commit d6dd54f4df
3 changed files with 201 additions and 21 deletions

View file

@ -2673,29 +2673,48 @@ highlight = _Highlight()
###############################################################################
# Pretty-printing metadata support
# Formatting (pretty-printing) metadata support
###############################################################################
@dataclasses.dataclass
class FormatMeta(SyntaxMeta):
newline: bool = False
newline: str | None = None
indent: int | None = None
group: bool = False
def group(*rules: Rule) -> Rule:
"""Indicates that the text should be put on a single line if possible
during pretty-printing. Has no effect on parsing.
"""
return mark(seq(*rules), format=FormatMeta(group=True))
def indent(*rules: Rule, amount: int | None = None) -> Rule:
"""Indicates a new level indentation during pretty-printing. The provided
rules are otherwise treated as if they were in a sequence. This rule has
no effect on parsing otherwise.
The specified amount is the number of "indentation" values to indent the
lines with. It defaults to 1.
"""
if amount is None:
amount = 4
amount = 1
return mark(seq(*rules), format=FormatMeta(indent=amount))
def newline() -> Rule:
return mark(Nothing, format=FormatMeta(newline=True))
def newline(text: str | None = None) -> Rule:
"""Indicate that, during pretty-printing, the line can be broken here. Has
no effect parsing.
If text is provided, the text will be inserted before the line break. This
allows for e.g. trailing commas in lists and whatnot to make things look
prettier, when supported.
"""
if text is None:
text = ""
return mark(Nothing, format=FormatMeta(newline=text))
###############################################################################

View file

@ -58,6 +58,145 @@ class Lazy:
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
class DocumentLayout:
segments: list[str | tuple[int, int]]
def __init__(self, segments):
self.segments = segments
def apply_to_source(self, original: str) -> str:
result = ""
for segment in self.segments:
if isinstance(segment, str):
result += segment
else:
start, end = segment
result += original[start:end]
return result
def layout_document(doc: Document, width: int) -> DocumentLayout:
"""Lay out a document to fit within the given width.
The result of this function is a layout which can trivially be converted
into a string given the original document.
"""
@dataclasses.dataclass
class Chunk:
doc: Document
indent: int
flat: bool
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
column = 0
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
def fits(chunk: Chunk) -> bool:
remaining = width - column
if remaining <= 0:
return False
stack = list(chunks)
stack.append(chunk)
while len(stack) > 0:
chunk = stack.pop()
match chunk.doc:
case None:
pass
case Text(start, end):
remaining -= end - start
case NewLine():
if chunk.flat:
# These are newlines that have been rendered flat,
# they are spaces I guess? TODO: Consider alternate
# forms, something that "goes here instead of
# newline", like maybe the empty string or... what?
remaining -= 1
else:
# These are newlines that are real, so it must have
# all fit.
return True
case Cons(left, right):
stack.append(chunk.with_document(right))
stack.append(chunk.with_document(left))
case Lazy():
stack.append(chunk.with_document(chunk.doc.resolve()))
case Indent(amount, child):
stack.append(chunk.with_document(child, and_indent=amount))
case Group(child):
# The difference between this approach and Justin's twist
# is that we consider the flat variable in Newline(),
# above, rather than here in Group. This makes us more
# like Wadler's original formulation, I guess. The
# grouping is an implicit transform over alternatives
# represented by newline. (If we have other kinds of
# alternatives we'll have to work those out elsewhere as
# well.)
stack.append(chunk.with_document(child))
case _:
typing.assert_never(chunk.doc)
if remaining < 0:
return False
return True # Everything must fit, so great!
output: list[str | tuple[int, int]] = []
while len(chunks) > 0:
chunk = chunks.pop()
match chunk.doc:
case None:
pass
case Text(start, end):
output.append((start, end))
column += end - start
case NewLine():
if chunk.flat:
# TODO: Custom newline flat mode. See also the
# corresponding comment in the "fits" function.
output.append(" ")
column += 1
else:
# TODO: Custom newline expansion, custom indent segments.
output.append("\n" + (chunk.indent * " "))
column = chunk.indent
case Cons(left, right):
chunks.append(chunk.with_document(right))
chunks.append(chunk.with_document(left))
case Indent(amount, doc):
chunks.append(chunk.with_document(doc, and_indent=amount))
case Lazy():
chunks.append(chunk.with_document(chunk.doc.resolve()))
case Group(child):
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
if chunk.flat or fits(candidate):
chunks.append(candidate)
else:
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
case _:
typing.assert_never(chunk)
return DocumentLayout(output)
def resolve_document(doc: Document) -> Document:
match doc:
case Cons(left, right):
@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document:
return doc
def layout_document(doc: Document) -> typing.Generator[str, None, None]:
del doc
raise NotImplementedError()
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
# TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
# The naming condition is important but
if isinstance(child, runtime.Tree):
return f"tree_{child.name}"
else:
@ -230,7 +366,7 @@ class Printer:
generated_grammar.append((rule_name, tx_children))
tx_children = [rule_name]
if pretty.newline:
if pretty.newline is not None:
if not done_newline:
generated_grammar.append(("newline", []))
done_newline = True
@ -272,6 +408,6 @@ class Printer:
)
return resolve_document(m)
def format_tree(self, tree: runtime.Tree) -> str:
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
doc = self.convert_tree_to_document(tree)
return next(layout_document(doc))
return layout_document(doc, width)

View file

@ -1,6 +1,6 @@
import typing
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
import parser.runtime as runtime
import parser.wadler as wadler
@ -29,13 +29,13 @@ class JsonGrammar(Grammar):
@rule
def object(self):
return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY)
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
@rule
def _object_pairs(self):
return alt(
self.object_pair + newline(),
self.object_pair + self.COMMA + newline() + self._object_pairs,
newline() + self.object_pair,
newline() + self.object_pair + self.COMMA + self._object_pairs,
)
@rule
@ -44,13 +44,13 @@ class JsonGrammar(Grammar):
@rule
def array(self):
return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE)
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
@rule
def _array_items(self):
return alt(
self.value + newline(),
self.value + self.COMMA + newline() + self._array_items,
newline() + self.value,
newline() + self.value + self.COMMA + self._array_items,
)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
typing.assert_never(doc)
def test_basic_printer():
def test_convert_tree_to_document():
text = '{"a": true, "b":[1,2,3]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
@ -148,3 +148,28 @@ def test_basic_printer():
"}",
]
]
def test_layout_basic():
text = '{"a": true, "b":[1,2,3]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 10).apply_to_source(text)
assert (
result
== """
{
"a":true,
"b":[
1,
2,
3
]
}
""".strip()
)